Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
f1e7bff
add onnx_ort_runtime demo
wjj19950828 Nov 11, 2022
25fdeb0
Merge remote-tracking branch 'upstream/develop' into update_runtime
wjj19950828 Nov 11, 2022
b5dff8d
rm in requirements
wjj19950828 Nov 11, 2022
5566e88
Merge remote-tracking branch 'upstream/develop' into update_runtime
wjj19950828 Nov 14, 2022
4e1f35a
support batch eval
wjj19950828 Nov 14, 2022
87dba08
Merge remote-tracking branch 'upstream/develop' into update_runtime
wjj19950828 Nov 15, 2022
99c610c
fixed MattingResults bug
wjj19950828 Nov 15, 2022
c59fd5c
move assignment for DetectionResult
wjj19950828 Nov 15, 2022
f82486d
Merge remote-tracking branch 'upstream/develop' into update_runtime
wjj19950828 Nov 15, 2022
ec475d0
Merge remote-tracking branch 'upstream/develop' into update_runtime
wjj19950828 Nov 16, 2022
7f68106
Merge remote-tracking branch 'upstream/develop' into update_runtime
wjj19950828 Nov 16, 2022
8bbea4f
Merge remote-tracking branch 'upstream/develop' into update_runtime
wjj19950828 Nov 16, 2022
b0f7c90
Merge remote-tracking branch 'upstream/develop' into update_runtime
wjj19950828 Nov 17, 2022
d428d95
Merge remote-tracking branch 'upstream/develop' into add_x2paddle
wjj19950828 Nov 17, 2022
0371811
integrated x2paddle
wjj19950828 Nov 18, 2022
c16eb42
Merge remote-tracking branch 'upstream/develop' into add_x2paddle
wjj19950828 Nov 18, 2022
279f160
Merge remote-tracking branch 'upstream/develop' into add_x2paddle
wjj19950828 Nov 20, 2022
cb9c966
add model convert readme
wjj19950828 Nov 20, 2022
010c0b3
Merge remote-tracking branch 'upstream/develop' into add_x2paddle
wjj19950828 Nov 20, 2022
670cefb
update readme
wjj19950828 Nov 20, 2022
6e9e6a5
re-lint
wjj19950828 Nov 20, 2022
c76f960
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Nov 22, 2022
2996743
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Nov 22, 2022
b0f45f7
add processor api
wjj19950828 Nov 22, 2022
6f797da
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Nov 22, 2022
5725868
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Nov 24, 2022
05a198b
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Nov 25, 2022
b85cc13
Add MattingResult Free
wjj19950828 Nov 25, 2022
1cfa9a6
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Nov 28, 2022
52180e7
change valid_cpu_backends order
wjj19950828 Nov 28, 2022
a71c403
Merge branch 'develop' into add_api_doc
jiangjiajun Nov 28, 2022
273e778
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Dec 1, 2022
43c9567
add ppocr benchmark
wjj19950828 Dec 1, 2022
f666166
Merge branch 'add_api_doc' of https://github.com/wjj19950828/FastDepl…
wjj19950828 Dec 1, 2022
ccfe6ff
mv bs from 64 to 32
wjj19950828 Dec 2, 2022
cb5436b
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Dec 2, 2022
90bd93e
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Dec 5, 2022
910bc4a
fixed quantize.md
wjj19950828 Dec 5, 2022
8d8362b
fixed quantize bugs
wjj19950828 Dec 5, 2022
bcf6c62
Add Monitor for benchmark
wjj19950828 Dec 6, 2022
1bdc355
Merge remote-tracking branch 'upstream/develop' into add_api_doc
wjj19950828 Dec 6, 2022
4eeb797
update mem monitor
wjj19950828 Dec 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 127 additions & 34 deletions benchmark/benchmark_ppcls.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,27 +113,109 @@ def build_option(args):
return option


def get_current_memory_mb(gpu_id=None):
import pynvml
import psutil
pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024. / 1024.
gpu_mem = 0
if gpu_id is not None:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024. / 1024.
return cpu_mem, gpu_mem


def get_current_gputil(gpu_id):
import GPUtil
GPUs = GPUtil.getGPUs()
gpu_load = GPUs[gpu_id].load
return gpu_load
class StatBase(object):
"""StatBase"""
nvidia_smi_path = "nvidia-smi"
gpu_keys = ('index', 'uuid', 'name', 'timestamp', 'memory.total',
'memory.free', 'memory.used', 'utilization.gpu',
'utilization.memory')
nu_opt = ',nounits'
cpu_keys = ('cpu.util', 'memory.util', 'memory.used')


class Monitor(StatBase):
"""Monitor"""

def __init__(self, use_gpu=False, gpu_id=0, interval=0.1):
self.result = {}
self.gpu_id = gpu_id
self.use_gpu = use_gpu
self.interval = interval
self.cpu_stat_q = multiprocessing.Queue()

def start(self):
cmd = '%s --id=%s --query-gpu=%s --format=csv,noheader%s -lms 50' % (
StatBase.nvidia_smi_path, self.gpu_id, ','.join(StatBase.gpu_keys),
StatBase.nu_opt)
if self.use_gpu:
self.gpu_stat_worker = subprocess.Popen(
cmd,
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
shell=True,
close_fds=True,
preexec_fn=os.setsid)
# cpu stat
pid = os.getpid()
self.cpu_stat_worker = multiprocessing.Process(
target=self.cpu_stat_func,
args=(self.cpu_stat_q, pid, self.interval))
self.cpu_stat_worker.start()

def stop(self):
try:
if self.use_gpu:
os.killpg(self.gpu_stat_worker.pid, signal.SIGUSR1)
# os.killpg(p.pid, signal.SIGTERM)
self.cpu_stat_worker.terminate()
self.cpu_stat_worker.join(timeout=0.01)
except Exception as e:
print(e)
return

# gpu
if self.use_gpu:
lines = self.gpu_stat_worker.stdout.readlines()
lines = [
line.strip().decode("utf-8") for line in lines
if line.strip() != ''
]
gpu_info_list = [{
k: v
for k, v in zip(StatBase.gpu_keys, line.split(', '))
} for line in lines]
if len(gpu_info_list) == 0:
return
result = gpu_info_list[0]
for item in gpu_info_list:
for k in item.keys():
if k not in ["name", "uuid", "timestamp"]:
result[k] = max(int(result[k]), int(item[k]))
else:
result[k] = max(result[k], item[k])
self.result['gpu'] = result

# cpu
cpu_result = {}
if self.cpu_stat_q.qsize() > 0:
cpu_result = {
k: v
for k, v in zip(StatBase.cpu_keys, self.cpu_stat_q.get())
}
while not self.cpu_stat_q.empty():
item = {
k: v
for k, v in zip(StatBase.cpu_keys, self.cpu_stat_q.get())
}
for k in StatBase.cpu_keys:
cpu_result[k] = max(cpu_result[k], item[k])
cpu_result['name'] = cpuinfo.get_cpu_info()['brand_raw']
self.result['cpu'] = cpu_result

def output(self):
return self.result

def cpu_stat_func(self, q, pid, interval=0.0):
"""cpu stat function"""
stat_info = psutil.Process(pid)
while True:
# pid = os.getpid()
cpu_util, mem_util, mem_use = stat_info.cpu_percent(
), stat_info.memory_percent(), round(stat_info.memory_info().rss /
1024.0 / 1024.0, 4)
q.put([cpu_util, mem_util, mem_use])
time.sleep(interval)
return


if __name__ == '__main__':
Expand All @@ -146,6 +228,7 @@ def get_current_gputil(gpu_id):

gpu_id = args.device_id
enable_collect_memory_info = args.enable_collect_memory_info
dump_result = dict()
end2end_statis = list()
cpu_mem = list()
gpu_mem = list()
Expand All @@ -165,45 +248,55 @@ def get_current_gputil(gpu_id):
try:
model = fd.vision.classification.PaddleClasModel(
model_file, params_file, config_file, runtime_option=option)
if enable_collect_memory_info:
import multiprocessing
import subprocess
import psutil
import signal
import cpuinfo
enable_gpu = args.device == "gpu"
monitor = Monitor(enable_gpu, gpu_id)
monitor.start()

model.enable_record_time_of_runtime()
im_ori = cv2.imread(args.image)
for i in range(args.iter_num):
im = im_ori
start = time.time()
result = model.predict(im)
end2end_statis.append(time.time() - start)
if enable_collect_memory_info:
gpu_util.append(get_current_gputil(gpu_id))
cm, gm = get_current_memory_mb(gpu_id)
cpu_mem.append(cm)
gpu_mem.append(gm)

runtime_statis = model.print_statis_info_of_runtime()

warmup_iter = args.iter_num // 5
end2end_statis_repeat = end2end_statis[warmup_iter:]
if enable_collect_memory_info:
cpu_mem_repeat = cpu_mem[warmup_iter:]
gpu_mem_repeat = gpu_mem[warmup_iter:]
gpu_util_repeat = gpu_util[warmup_iter:]
monitor.stop()
mem_info = monitor.output()
dump_result["cpu_rss_mb"] = mem_info['cpu'][
'memory.used'] if 'cpu' in mem_info else 0
dump_result["gpu_rss_mb"] = mem_info['gpu'][
'memory.used'] if 'gpu' in mem_info else 0
dump_result["gpu_util"] = mem_info['gpu'][
'utilization.gpu'] if 'gpu' in mem_info else 0

dump_result = dict()
dump_result["runtime"] = runtime_statis["avg_time"] * 1000
dump_result["end2end"] = np.mean(end2end_statis_repeat) * 1000
if enable_collect_memory_info:
dump_result["cpu_rss_mb"] = np.mean(cpu_mem_repeat)
dump_result["gpu_rss_mb"] = np.mean(gpu_mem_repeat)
dump_result["gpu_util"] = np.mean(gpu_util_repeat)

f.writelines("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
f.writelines("End2End(ms): {} \n".format(str(dump_result["end2end"])))
print("Runtime(ms): {} \n".format(str(dump_result["runtime"])))
print("End2End(ms): {} \n".format(str(dump_result["end2end"])))
if enable_collect_memory_info:
f.writelines("cpu_rss_mb: {} \n".format(
str(dump_result["cpu_rss_mb"])))
f.writelines("gpu_rss_mb: {} \n".format(
str(dump_result["gpu_rss_mb"])))
f.writelines("gpu_util: {} \n".format(
str(dump_result["gpu_util"])))
print("cpu_rss_mb: {} \n".format(str(dump_result["cpu_rss_mb"])))
print("gpu_rss_mb: {} \n".format(str(dump_result["gpu_rss_mb"])))
print("gpu_util: {} \n".format(str(dump_result["gpu_util"])))
except:
f.writelines("!!!!!Infer Failed\n")

Expand Down
Loading