AeroSpike的python压测代码

2022-06-21 00:00:00 用户专区订阅内存这两个

前一阵青云举办了一场技术比赛，大概就是个大并发相关的东西。
本人水平略菜不敢也没时间去比。
不过弄个了aerospike，大概做了一个小原型的压测。
贴出python的多进程压测代码：

#encoding=utf-8
import aerospike
import random
import time
from datetime import datetime
import traceback

# Configure the client
config = {
'hosts': [ ('10.1.1.xxx', 3002) ],
'policies': { 'timeout': 12000}
}

# Create a client and connect it to the cluster
try:
client = aerospike.client(config).connect()
except Exception, e:
print "failed to connect aerospike ", e
import sys
sys.exit(1)

NAMESPACE = "benchmark"
# https://www.aerospike.com/apidocs/python/client.html
# https://www.aerospike.com/apidocs/python/genindex.html

map_policy={'map_write_mode': aerospike.MAP_UPDATE, 'map_order':aerospike.MAP_KEY_VALUE_ORDERED}

def insert_friends(num = 0):
count = 0
for p_id in person:
key = (NAMESPACE, 'users', p_id)
client.map_set_policy(key, 'user_friends', map_policy)
items = {}
for i in range(1, 1000):
tmp = str(random.randint(p_begin, p_end))
items[tmp] = 1
client.map_put_items(key, "user_friends", items)
count += 1
#if num == 0 and count % 2 ==1:
if num == 0:
print datetime.now(), "insert_friends:%d"%count

def insert_article(num=0):
count = 0
for a_id in article[860000:]:
key = (NAMESPACE, 'articles', a_id)
res = client.put(key, {'author': str(random.randint(p_begin, p_end))})
count += 1
#if num == 0 and count % 50 ==1:
if num == 0:
print datetime.now(), "insert_articles:%d"%count

list_policy = {}

def insert_like(num=0):
index = 0
#for a_id in article[20000:]:
for a_id in article[17200:]:
try:
key = (NAMESPACE, 'articles', a_id)
author = client.get((NAMESPACE,'articles',a_id))[2]['author']

like_person = str(random.randint(p_begin, p_end))
for i in range(index % 2 + 1):
if index % 100 == 1: # 100percent check friend and insert 100 ops
count = client.map_get_by_key((NAMESPACE, 'users', author), 'user_friends', like_person, aerospike.MAP_RETURN_COUNT)
if count > 0:
client.list_insert(key, 'friend', 0, like_person)
else:
client.list_insert(key, 'normal', 0, like_person)
else:
client.list_insert(key, 'normal', 0, like_person)

index += 1
#if num == 0 and index % 5 ==1:
if num == 0:
print datetime.now(), "insert_like:%d"%index
except aerospike.exception.RecordNotFound as e:
continue
except Exception as e:
print "insert like error, %s"%(traceback.format_exc())

def search_like():
#print 67
import multiprocessing
try:
parts = 100
procs = []
for i in range(parts):
#proc = multiprocessing.Process(target = search_like_sub, args=(i, ))
proc = multiprocessing.Process(target = search_like_sub_op, args=(i, ))
procs.append(proc)
for proc in procs:
proc.start()
print 77
print parts
except Exception as e:
print e
print "Error: unable to start thread"

def search_like_sub(seed):
begin = time.time()
count = 0
random.seed(seed)
for a_id in range(len(article)):
a_id = random.choice(article)
try:
count += 1
try:
friend_count = client.list_size((NAMESPACE, 'like', a_id), 'friend')
client.list_get_range((NAMESPACE, 'like', a_id), 'friend', 0, 50)
except aerospike.exception.RecordNotFound as e:
friend_count = -1
try:
normal_count = client.list_size((NAMESPACE, 'like', a_id), 'normal') #将错就错
client.list_get_range((NAMESPACE, 'like', a_id), 'normal', 0, 50)
except aerospike.exception.RecordNotFound as e:
normal_count = -1

if count % 3000 == 1:
print a_id, friend_count, normal_count
except Exception, e:
print count, e
print time.time() - begin

def search_like_sub_op(seed):
begin = time.time()
count = 0
random.seed(seed + begin)
for xx in range(12000):
#for a_id in range(len(article)):
r_id = random.randint(15000000, 15000000 + 1500000 * 50)
if (xx + seed) % (999) > 0:
r_id = 15000000 + random.randint(0, 3000 * 1) * 18517
a_id = "a_%d"%r_id
#a_id = random.choice(article)
try:
count += 1
ops = [
{
"op" : aerospike.OP_LIST_GET_RANGE,
"bin": "friend",
"index":0,
"val": 100
},
{
"op" : aerospike.OP_LIST_GET_RANGE,
"bin": "normal",
"index":0,
"val": 100
}
]
try:
data = client.operate((NAMESPACE, 'articles', a_id), ops, {'ttl':360}, {'timeout':500})[2]
friend_count = len(data.get('friend', []))
normal_count = len(data.get('normal', []))
except aerospike.exception.RecordNotFound as e:
normal_count = -1
friend_count = -1

if count % 800 == 1:
print a_id, friend_count, normal_count
except Exception as e:
print "error", e, count
print time.time() - begin

p_begin = 0
p_end = 0
person = []
article = []
def insert_test_data_sub(num):
global p_begin, p_end, person, article
begin = time.time()
step = 100000
#step = 100
p_begin = 10000000 + num * step
p_end = 10000000 + (num + 1) * step
person = [str(i) for i in range(p_begin, p_end)]
article = ["a_" + str(j) for j in range(15000000 + num * step * 20, 15000000 + (num + 1) * step * 20)]
#insert_friends(num)
#insert_article(num)
insert_like(num)
print "total_cost_time", time.time() - begin

def insert_test_data():
import multiprocessing
try:
parts = 50
procs = []
for i in range(parts):
proc = multiprocessing.Process(target = insert_test_data_sub, args=(i, ))
procs.append(proc)
for proc in procs:
proc.start()
except Exception as e:
print e
print "Error: unable to start thread"

if __name__ == "__main__":
insert_test_data()
search_like()
time.sleep(60)

主要涉及:

1.insert_test_data()灌入测试数据
2.search_like()检查
3.初始化aerospike的client
4.使用aerospike的list以及map这两个基本类型。
1
考虑到性能问题，这两个都是使用
multiprocessing

测试结果：
16核20G内存，1.6T机械硬盘的aerospike服务器。(非常垃圾的机器)
在有1亿个article，每个article有个位数like的情况下，
其中99.8%以上的请求命中其中3000个article，其余完全随机查询。可以达到8万的search的QPS。aerospike自己有做冷热数据的缓存的cache。

insert时候设定500万用户，每个用户1000个friend。
1天内导完数据
aerospike的namespace的conf:

namespace benchmark {
memory-size 15G # Maximum memory allocation for data and
# primary and secondary indexes.
storage-engine device { # Configure the storage-engine to use
# persistence. Maximum size is 2 TiB
file /opt/aerospike/benchmark_db_store # Location of data file on server.
# file /opt/aerospike/<another> # (optional) Location of data file on server.
filesize 500G # Max size of each file in GiB.
data-in-memory false # Indicates that all data should also be
# in memory.
}
}

遇到的问题：

1. aerospike的map和list受限于record大小一般是1M之类的，如果有100万个like，估计存储不下。
2. 哪怕能够存储100万的like，估计查找like中是否存在某个id也有一些问题。
3. aerospike无论是否是内存模式，每个key至少会在内存中占用64byte的index。10亿个key需要
————————————————
版权声明：本文为CSDN博主「bairongdong1」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。
原文链接：https://blog.csdn.net/bairongdong1/article/details/62138955

相关文章