#!/usr/bin/env python
# coding: utf-8
# # Generate [dividiti](http://dividiti.com)'s submissions to [MLPerf Inference v0.5](https://github.com/mlperf/inference/tree/master/v0.5)
#
# ## Overview
# This Jupyter notebook covers [dividiti](http://dividiti.com)'s submissions to [MLPerf Inference v0.5](https://github.com/mlperf/inference/tree/master/v0.5). It validates that experimental data obtained via automated, portable and reproducible [Collective Knowledge](http://cknowledge.org) workflows conforms to [General MLPerf Submission Rules](https://github.com/mlperf/policies/blob/master/submission_rules.adoc)
# and [MLPerf Inference Rules](https://github.com/mlperf/inference_policies/blob/master/inference_rules.adoc), including runnning the official [`submission_checker.py`](https://github.com/mlperf/inference/blob/master/v0.5/tools/submission/submission-checker.py).
# A live version of this Jupyter Notebook can be viewed [here](https://nbviewer.jupyter.org/urls/dl.dropbox.com/s/1xlv5oacgobrfd4/mlperf-inference-v0.5-dividiti.ipynb).
# ## Table of Contents
# 1. [Overview](#overview)
# 1. [Includes](#includes)
# 1. [System templates](#templates)
# 1. [Firefly RK3399](#templates_firefly)
# 1. [Linaro HiKey960](#templates_hikey960)
# 1. [Huawei Mate 10 Pro](#templates_mate10pro)
# 1. [Raspberry Pi 4](#templates_rpi4)
# 1. [HP Z640](#templates_velociti)
# 1. [Default](#templates_default)
# 1. [Systems](#systems)
# 1. [Implementations](#implementations)
# 1. [Get the experimental data](#get)
# 1. [Image Classification - Closed](#get_image_classification_closed)
# 1. [Image Classification - Open](#get_image_classification_open)
# 1. [Object Detection - Open](#get_object_detection_open)
# 1. [Generate the submission checklist](#checklist)
# 1. [Check the experimental data](#check)
#
# ## Includes
# ### Standard
# In[ ]:
import os
import sys
import json
import re
from pprint import pprint
from shutil import copy2
from copy import deepcopy
# ### Scientific
# If some of the scientific packages are missing, please install them using:
# ```
# # python3 -m pip install jupyter pandas numpy matplotlib seaborn --user
# ```
# In[ ]:
import pandas as pd
import numpy as np
import subprocess
print ('Pandas version: %s' % pd.__version__)
print ('NumPy version: %s' % np.__version__)
# No need to hardcode e.g. as:
# sys.path.append('$CK_TOOLS/tool-coco-master-gcc-8.3.0-compiler.python-3.6.10-linux-64/')
# since it gets added to the Python path automatically via the dependency.
from pycocotools.coco import COCO
# No need to hardcode (e.g. as '$CK_TOOLS/dataset-coco-2017-val'),
# since it gets added to the path automatically via the dependency.
coco_dir = os.environ.get('CK_ENV_DATASET_COCO','')
if coco_dir=='':
print('Error: Path to COCO dataset not defined!')
exit(1)
# No need to hardcode (e.g. as '$CK_TOOLS/dataset-imagenet-ilsvrc2012-aux/val.txt'),
# since it gets added to the path automatically via the dependency.
imagenet_val_file = os.environ.get('CK_CAFFE_IMAGENET_VAL_TXT','')
if imagenet_val_file=='':
print('Error: Path to ImageNet labels not defined!')
exit(1)
# ### Collective Knowledge
# If CK is not installed, please install it using:
# ```
# # python -m pip install ck
# ```
# In[ ]:
import ck.kernel as ck
print ('CK version: %s' % ck.__version__)
#
# ## Systems
# Load platform_templates from CK SUT entries
#
r = ck.access({'action':'list', 'module_uoa':'sut'})
if r['return']>0:
print('Error: %s' % r['error'])
exit(1)
platform_templates = { sut['data_uoa']: sut['meta']['data'] for sut in r['lst'] }
inference_engine_to_printable = {
'armnn': 'ArmNN',
'tflite': 'TFLite',
'tensorrt': 'TensorRT',
'tensorflow': 'TensorFlow',
}
backend_to_printable = {
'neon': 'Neon',
'opencl': 'OpenCL',
'ruy': 'Ruy',
'cpu': 'CPU',
'cuda': 'CUDA',
'tensorrt': 'TensorRT-static',
'tensorrt-dynamic': 'TensorRT-dynamic',
}
system_description_cache = {}
def dump_system_description_dictionary(target_path, division, platform, inference_engine, inference_engine_version, backend):
if target_path in system_description_cache:
return system_description_cache[target_path]
library_backend = inference_engine + '_' + inference_engine_version + (('-' + backend) if backend else '')
division_system = division + '-' + platform + '-' + library_backend
if library_backend == 'tensorflow-v1.14-cpu':
status = 'RDI'
elif library_backend == 'tflite-v1.15.0' or library_backend == 'tensorrt-v6.0':
status = 'unofficial'
else:
status = 'available'
framework = inference_engine_to_printable[inference_engine] + ' ' + inference_engine_version + \
(' ({})'.format(backend_to_printable[backend]) if backend else '')
template = deepcopy(platform_templates[platform])
template.update({
'division' : division,
'submitter' : 'dividiti', # 'dividiti' if platform != 'velociti' else 'dividiti, Politecnico di Milano'
'status' : status,
'framework' : framework,
})
if (not library_backend.startswith('tensorrt') and not library_backend.startswith('tensorflow') and not library_backend.endswith('opencl')) or library_backend.endswith('cpu'):
template.update({
'accelerator_frequency' : '-',
'accelerator_memory_capacity' : '-',
'accelerator_memory_configuration': '-',
'accelerator_model_name' : '-',
'accelerator_on-chip_memories': '-',
'accelerators_per_node' : '0',
})
with open(target_path, 'w') as system_description_file:
json.dump(template, system_description_file, indent=2)
system_description_cache[target_path] = template
return template
#
# ## Implementations
implementation_cache = {}
def dump_implementation_dictionary(target_path, model_dict, inference_engine, program_name, benchmark):
if target_path in implementation_cache:
return implementation_cache[target_path]
model_env = model_dict['cus']['install_env']
model_tags = model_dict['dict']['tags']
recorded_model_retraining = model_env.get('ML_MODEL_RETRAINING', 'no')
## fetch recorded model data types, if available, guess if unavailable:
recorded_model_data_type = model_env.get('ML_MODEL_DATA_TYPE')
recorded_model_input_data_type = model_env.get('ML_MODEL_INPUT_DATA_TYPE')
if not recorded_model_data_type:
if {'non-quantized', 'fp32', 'float', 'float32'} & set(model_tags):
recorded_model_data_type = 'fp32'
elif {'quantized', 'quant', 'uint8'} & set(model_tags):
recorded_model_data_type = 'uint8'
else:
print("Warning: could not guess whether the model is quantized or not - please add tags or attributes")
recorded_model_data_type = 'fp32'
if not recorded_model_input_data_type: # assume the same
recorded_model_input_data_type = recorded_model_data_type
## recorded_model_input_data_type may need translating from NumPy name into MLPerf's vocabulary:
model_input_type_mapping = {'float32': 'fp32', 'float16': 'fp16' }
if recorded_model_input_data_type in model_input_type_mapping:
recorded_model_input_data_type = model_input_type_mapping[recorded_model_input_data_type]
## fetching/constructing the URL of the (original) model:
if 'PACKAGE_URL' not in model_env: # this model is a result of conversion
model_env = model_dict['dict']['deps']['model-source']['dict']['customize']['install_env']
recorded_model_url = model_env['PACKAGE_URL'].rstrip('/') + '/' + model_env['PACKAGE_NAME']
## figure out the transformation path:
if program_name in [ 'image-classification-tflite-loadgen', 'image-classification-armnn-tflite-loadgen' ]:
if benchmark in ['resnet', 'resnet50']:
recorded_transformation_path = 'TF -> TFLite'
else:
recorded_transformation_path = 'TFLite'
elif program_name == 'image-classification-tensorrt-loadgen-py':
if benchmark in ['resnet', 'resnet50']:
recorded_transformation_path = 'ONNX'
else:
recorded_transformation_path = 'TF'
elif program_name == 'mlperf-inference-vision':
recorded_transformation_path = 'None (TensorFlow)'
else:
raise Exception("Don't know how to derive the transformation path of the model")
# Initial model is never supplied in one of these, so there must have been a transformation:
if inference_engine in ['armnn', 'tensorrt']:
recorded_transformation_path += ' -> '+inference_engine_to_printable[inference_engine]
implementation_dictionary = {
'retraining': recorded_model_retraining,
'input_data_types': recorded_model_input_data_type,
'weight_data_types': recorded_model_data_type,
'starting_weights_filename': recorded_model_url,
'weight_transformations': recorded_transformation_path,
}
with open(target_path, 'w') as implementation_file:
json.dump(implementation_dictionary, implementation_file, indent=2)
implementation_cache[target_path] = implementation_dictionary
return implementation_dictionary
# In[ ]:
implementation_readmes = {}
implementation_readmes['image-classification-tflite-loadgen'] = """# MLPerf Inference - Image Classification - TFLite
This C++ implementation uses TFLite to run TFLite models for Image Classification on CPUs.
## Links
- [Jupyter notebook](https://nbviewer.jupyter.org/urls/dl.dropbox.com/s/1xlv5oacgobrfd4/mlperf-inference-v0.5-dividiti.ipynb)
- [Source code](https://github.com/ctuning/ck-mlperf/tree/master/program/image-classification-tflite-loadgen).
- [Instructions](https://github.com/mlperf/inference/blob/master/v0.5/classification_and_detection/optional_harness_ck/classification/tflite/README.md).
"""
implementation_readmes['image-classification-armnn-tflite-loadgen'] = """# MLPerf Inference - Image Classification - ArmNN-TFLite
This C++ implementation uses ArmNN with the TFLite frontend to run TFLite models for Image Classification on Arm Cortex CPUs and Arm Mali GPUs.
## Links
- [Jupyter notebook](https://nbviewer.jupyter.org/urls/dl.dropbox.com/s/1xlv5oacgobrfd4/mlperf-inference-v0.5-dividiti.ipynb)
- [Source code](https://github.com/ctuning/ck-mlperf/tree/master/program/image-classification-armnn-tflite-loadgen).
- [Instructions](https://github.com/ARM-software/armnn-mlperf/blob/master/README.md).
"""
implementation_readmes['image-classification-tensorrt-loadgen-py'] = """# MLPerf Inference - Image Classification - TensorRT
This Python implementation uses TensorRT to run models Image Classification on Arm Cortex CPUs and Arm Mali GPUs.
### Links
- [Source code](https://github.com/ctuning/ck-mlperf/tree/master/program/image-classification-tensorrt-loadgen-py).
"""
implementation_readmes['mlperf-inference-vision'] = """# MLPerf Inference - Object Detection - TensorFlow
This Python implementation is the official MLPerf Inference vision application, modified to support other
object detection models and run with TensorRT.
## Links
- [CK wrapper](https://github.com/ctuning/ck-object-detection/tree/master/program/mlperf-inference-vision).
- [vision_with_ck branch in dividiti's fork of mlperf/inference](https://github.com/dividiti/inference/tree/vision_with_ck).
- [Docker image with instructions](https://github.com/ctuning/ck-mlperf/tree/master/docker/mlperf-inference-vision-with-ck.tensorrt.ubuntu-18.04).
- [Jupyter notebook](https://nbviewer.jupyter.org/urls/dl.dropbox.com/s/1xlv5oacgobrfd4/mlperf-inference-v0.5-dividiti.ipynb)
"""
# In[ ]:
def get_program_path(program_name):
r = ck.access({'action':'find', 'repo_uoa':'*', 'module_uoa':'program', 'data_uoa':program_name})
if r['return']>0:
print('Error: %s' % r['error'])
exit(1)
return r['path']
# In[ ]:
measurements_readmes = {}
task = 'image-classification'
for division_upper in [ 'Closed', 'Open' ]:
division_lower = division_upper.lower()
measurements_readmes[division_lower+'-'+task] = '''# MLPerf Inference - {} Division - Image Classification
We performed our measurements using automated, customizable, portable and reproducible
[Collective Knowledge](http://cknowledge.org) workflows. Our workflows automatically
install dependencies (models, datasets, etc.), preprocess input data in the correct way,
and so on.
## CK repositories
As CK is always evolving, it is hard to pin particular revisions of all repositories.
The most relevant repositories and their latest revisions on the submission date (11/Oct/2019):
- [ck-mlperf](https://github.com/ctuning/ck-mlperf) @ [ee77cfd](https://github.com/ctuning/ck-mlperf/commit/ee77cfd3ddfa30739a8c2f483fe9ba83a233a000) (contains programs integrated with LoadGen, model packages and scripts).
- [ck-env](https://github.com/ctuning/ck-env) @ [f9ac337](https://github.com/ctuning/ck-env/commit/f9ac3372cdc82fa46b2839e45fc67848ab4bac03) (contains dataset descriptions, preprocessing methods, etc.)
- [ck-tensorflow](https://github.com/ctuning/ck-tensorflow) @ [eff8bec](https://github.com/ctuning/ck-tensorflow/commit/eff8bec192021162e4a336dbd3e795afa30b7d26) (contains TFLite packages).
- [armnn-mlperf](https://github.com/arm-software/armnn-mlperf) @ [42f44a2](https://github.com/ARM-software/armnn-mlperf/commit/42f44a266b6b4e04901255f46f6d34d12589208f) (contains ArmNN/ArmCL packages).
## Links
- [Bash script](https://github.com/ctuning/ck-mlperf/tree/master/script/mlperf-inference-v0.5.{}.image-classification) used to invoke benchmarking on Linux systems or Android devices.
'''.format(division_upper, division_lower)
task = 'object-detection'
for division_upper in [ 'Closed', 'Open' ]:
division_lower = division_upper.lower()
measurements_readmes[division_lower+'-'+task] = '''# MLPerf Inference - {} Division - Object Detection
We performed our measurements using automated, customizable, portable and reproducible
[Collective Knowledge](http://cknowledge.org) workflows. Our workflows automatically
install dependencies (models, datasets, etc.), preprocess input data in the correct way,
and so on.
## CK repositories
As CK is always evolving, it is hard to pin particular revisions of all repositories.
The most relevant repositories and their latest revisions on the submission date (18/Oct/2019):
- [ck-mlperf](https://github.com/ctuning/ck-mlperf) @ [ef1fced](https://github.com/ctuning/ck-mlperf/commit/ef1fcedd495fd03b5ad6d62d62c8ba271854f2ad) (contains the CK program wrapper, MLPerf SSD-MobileNet model packages and scripts).
- [ck-object-detection](https://github.com/ctuning/ck-object-detection) @ [780d328](https://github.com/ctuning/ck-object-detection/commit/780d3288ec19656cb60c5ad39b2486bbf0fbf97a) (contains most model packages)
- [ck-env](https://github.com/ctuning/ck-env) @ [5af9fbd](https://github.com/ctuning/ck-env/commit/5af9fbd93ad6c6465b631716645ad9442a333442) (contains dataset descriptions, preprocessing methods, etc.)
## Links
- [Docker image with instructions](https://github.com/ctuning/ck-mlperf/tree/master/docker/mlperf-inference-vision-with-ck.tensorrt.ubuntu-18.04).
- [Bash script](https://github.com/ctuning/ck-mlperf/tree/master/script/mlperf-inference-v0.5.{}.object-detection) used to invoke benchmarking via the Docker image.
'''.format(division_upper, division_lower)
# In[ ]:
# Snapshot of https://github.com/dividiti/inference/blob/61220457dec221ed1984c62bd9d382698bd71bc6/v0.5/mlperf.conf
mlperf_conf_6122045 = '''
# The format of this config file is 'key = value'.
# The key has the format 'model.scenario.key'. Value is mostly int64_t.
# Model maybe '*' as wildcard. In that case the value applies to all models.
# All times are in milli seconds
*.SingleStream.target_latency = 10
*.SingleStream.target_latency_percentile = 90
*.SingleStream.min_duration = 60000
*.SingleStream.min_query_count = 1024
*.MultiStream.target_qps = 20
*.MultiStream.target_latency_percentile = 99
*.MultiStream.samples_per_query = 4
*.MultiStream.max_async_queries = 1
*.MultiStream.target_latency = 50
*.MultiStream.min_duration = 60000
*.MultiStream.min_query_count = 270336
ssd-resnet34.MultiStream.target_qps = 15
ssd-resnet34.MultiStream.target_latency = 66
gnmt.MultiStream.min_query_count = 90112
gnmt.MultiStream.target_latency = 100
gnmt.MultiStream.target_qps = 10
gnmt.MultiStream.target_latency_percentile = 97
*.Server.target_qps = 1.0
*.Server.target_latency = 10
*.Server.target_latency_percentile = 99
*.Server.target_duration = 0
*.Server.min_duration = 60000
*.Server.min_query_count = 270336
resnet50.Server.target_latency = 15
ssd-resnet34.Server.target_latency = 100
gnmt.Server.min_query_count = 90112
gnmt.Server.target_latency = 250
gnmt.Server.target_latency_percentile = 97
*.Offline.target_qps = 1.0
*.Offline.target_latency_percentile = 90
*.Offline.min_duration = 60000
*.Offline.min_query_count = 1
'''
#
# ## Get the experimental data
# Download experimental data and add CK repositories as follows.
#
# ### Image Classification - Closed (MobileNet, ResNet)
# #### `firefly`
# ```
# $ wget https://www.dropbox.com/s/3md826fk7k1taf3/mlperf.closed.image-classification.firefly.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.closed.image-classification.firefly.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/jusoz329mhixpxm/mlperf.closed.image-classification.firefly.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.closed.image-classification.firefly.armnn-v19.08.neon.zip
#
# $ wget https://www.dropbox.com/s/08lzbz7jl2w5jhu/mlperf.closed.image-classification.firefly.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.closed.image-classification.firefly.armnn-v19.08.opencl.zip
# ```
# #### `hikey960`
# ```
# $ wget https://www.dropbox.com/s/lqnffl6wbaeceul/mlperf.closed.image-classification.hikey960.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.closed.image-classification.hikey960.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/6m6uv1d33yc82f8/mlperf.closed.image-classification.hikey960.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.closed.image-classification.hikey960.armnn-v19.08.neon.zip
#
# $ wget https://www.dropbox.com/s/bz56y4damfqggr8/mlperf.closed.image-classification.hikey960.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.closed.image-classification.hikey960.armnn-v19.08.opencl.zip
# ```
# #### `rpi4`
# ```
# $ wget https://www.dropbox.com/s/ig97x9cqoxfs3ne/mlperf.closed.image-classification.rpi4.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.closed.image-classification.rpi4.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/ohcuyes409h66tx/mlperf.closed.image-classification.rpi4.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.closed.image-classification.rpi4.armnn-v19.08.neon.zip
# ```
# #### `mate10pro`
# ```
# $ wget https://www.dropbox.com/s/r7hss1sd0268b9j/mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.zip
#
# $ wget https://www.dropbox.com/s/iflzxbxcv3qka9x/mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.zip
# ```
# **NB:** We aborted the ResNet accuracy experiment with TFLite, as it was estimated to take 17 hours.
# #### `mate10pro` (only for testing the checker)
# ##### BAD_LOADGEN
# ```
# $ wget https://www.dropbox.com/s/nts8e7unb7vm68f/mlperf.closed.image-classification.mate10pro.tflite-v1.13.mobilenet.BAD_LOADGEN.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.tflite-v1.13.mobilenet.BAD_LOADGEN.zip
# ```
# ##### BAD_RESNET
# ```
# $ wget https://www.dropbox.com/s/bi2owxxpcfm6n2s/mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.BAD_RESNET.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.BAD_RESNET.zip
#
# $ wget https://www.dropbox.com/s/t2o2elqdyitqlpi/mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.BAD_RESNET.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.BAD_RESNET.zip
# ```
#
# ### Image Classification - Open (MobileNets-v1,v2)
# #### `firefly`
# ```
# $ wget https://www.dropbox.com/s/q8ieqgnr3zn6w4y/mlperf.open.image-classification.firefly.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.open.image-classification.firefly.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/zpenduz1i4qt651/mlperf.open.image-classification.firefly.tflite-v1.15.mobilenet-v1-quantized.zip
# $ ck add repo --zip=mlperf.open.image-classification.firefly.tflite-v1.15.mobilenet-v1-quantized.zip
#
# $ wget https://www.dropbox.com/s/3mmefvxc15m9o5b/mlperf.open.image-classification.firefly.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.open.image-classification.firefly.armnn-v19.08.opencl.zip
#
# $ wget https://www.dropbox.com/s/hrupp4o4apo3dfa/mlperf.open.image-classification.firefly.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.open.image-classification.firefly.armnn-v19.08.neon.zip
# ```
# #### `hikey960`
# ```
# $ wget https://www.dropbox.com/s/2gbbpsd2pjurvc8/mlperf.open.image-classification.hikey960.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.open.image-classification.hikey960.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/rmttjnxzih9snzh/mlperf.open.image-classification.hikey960.tflite-v1.15.mobilenet-v1-quantized.zip
# $ ck add repo --zip=mlperf.open.image-classification.hikey960.tflite-v1.15.mobilenet-v1-quantized.zip
#
# $ wget https://www.dropbox.com/s/m5illg8i2tse5hg/mlperf.open.image-classification.hikey960.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.open.image-classification.hikey960.armnn-v19.08.opencl.zip
#
# $ wget https://www.dropbox.com/s/3cujqfe4ps0g66h/mlperf.open.image-classification.hikey960.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.open.image-classification.hikey960.armnn-v19.08.neon.zip
# ```
# #### `rpi4`
# ```
# $ wget https://www.dropbox.com/s/awhdqjq3p4tre2q/mlperf.open.image-classification.rpi4.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.open.image-classification.rpi4.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/rf8vsg5firhjzf8/mlperf.open.image-classification.rpi4.tflite-v1.15.mobilenet-v1-quantized.zip
# $ ck add repo --zip=mlperf.open.image-classification.rpi4.tflite-v1.15.mobilenet-v1-quantized.zip
#
# $ wget https://www.dropbox.com/s/0oketvqml7gyzl0/mlperf.open.image-classification.rpi4.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.open.image-classification.rpi4.armnn-v19.08.neon.zip
# ```
# #### `mate10pro`
# ```
# $ wget https://www.dropbox.com/s/avi6h9m2demz5zr/mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet.zip
# $ ck add repo --zip=mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet.zip
#
# $ wget https://www.dropbox.com/s/soaw27zcjb8hhww/mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet-v1-quantized.zip
# $ ck add repo --zip=mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet-v1-quantized.zip
# ```
# **NB:** `mate10pro.tflite-v1.13.mobilenet` would have been a perfectly valid closed submission, just finished a little bit late after the deadline. `mate10pro.tflite-v1.13.mobilenet-quantized` is an open submission alright, as dividiti hadn't declared submitting quantized results before the deadline.
#
# ### Object Detection - Open
# #### `velociti`
# ```
# $ wget https://www.dropbox.com/s/wiea3a8zf077jsv/mlperf.open.object-detection.velociti.zip
# $ ck add repo --zip=mlperf.open.object-detection.velociti.zip
# ```
#
# ## Generate the submission checklist
# In[ ]:
checklist_template = """MLPerf Inference 0.5 Self-Certification Checklist
Name of Certifying Engineer(s): %(name)s
Email of Certifying Engineer(s): %(email)s
Name of System(s) Under Test: %(system_name)s
Division (check one):
- [%(open)s] Open
- [%(closed)s] Closed
Category (check one):
- [%(category_available)s] Available
- [%(category_preview)s] Preview
- [%(category_rdi)s] Research, Development, and Internal (RDI)
Benchmark (check one):
- [%(benchmark_mobilenet)s] MobileNet
- [ ] SSD-MobileNet
- [%(benchmark_resnet)s] ResNet
- [ ] SSD-1200
- [ ] NMT
- [%(benchmark_other)s] Other, please specify: %(benchmark_other_specify)s
Please fill in the following tables adding lines as necessary:
97%%-tile latency is required for NMT only. 99%%-tile is required for all other models.
### Single Stream Results Table
| SUT Name | Benchmark | Query Count | Accuracy |
|----------|-----------|-------------|----------|
| %(system)s | %(benchmark)s | %(query_count)s | %(accuracy_pc)s%% |
### Multi-Stream Results Table
| SUT Name | Benchmark | Query Count | Accuracy | 97%%-tile Latency | 99%%-tile Latency |
|----------|-----------|-------------|-----------|------------------|------------------|
| | | | | | |
### Server Results Table
| SUT Name | Benchmark | Query Count | Accuracy | 97%%-tile Latency | 99%%-tile Latency |
|----------|-----------|-------------|----------|------------------|------------------|
| | | | | | |
### Offline Results Table
| SUT Name | Benchmark | Sample Count | Accuracy |
|----------|-----------|--------------|----------|
| | | | |
Scenario (check all that apply):
- [%(scenario_singlestream)s] Single-Stream
- [%(scenario_multistream)s] Multi-Stream
- [%(scenario_server)s] Server
- [%(scenario_offline)s] Offline
For each SUT, does the submission meet the latency target for each
combination of benchmark and scenario? (check all that apply)
- [x] Yes (Single-Stream and Offline no requirements)
- [ ] Yes (MobileNet x Multi-Stream 50 ms @ 99%%)
- [ ] Yes (MobileNet x Server 10 ms @ 99%%)
- [ ] Yes (SSD-MobileNet x Multi-Stream 50 ms @ 99%%)
- [ ] Yes (SSD-MobileNet x Server 10 ms @ 99%%)
- [ ] Yes (ResNet x Multi-Stream 50 ms @ 99%%)
- [ ] Yes (ResNet x Server 15 ms @ 99%%)
- [ ] Yes (SSD-1200 x Multi-Stream 66 ms @ 99%%).
- [ ] Yes (SSD-1200 x Server 100 ms @ 99%%)
- [ ] Yes (NMT x Multi-Stream 100 ms @ 97%%)
- [ ] Yes (NMT x Server 250 ms @ 97%%)
- [ ] No
For each SUT, is the appropriate minimum number of queries or samples
met, depending on the Scenario x Benchmark? (check all that apply)
- [x] Yes (Single-Stream 1,024 queries)
- [ ] Yes (Offline 24,576 samples)
- [ ] Yes (NMT Server and Multi-Stream 90,112 queries)
- [ ] Yes (Image Models Server and Multi-Stream 270,336 queries)
- [ ] No
For each SUT and scenario, is the benchmark accuracy target met?
(check all that apply)
- [%(mobilenet_accuracy_met)s] Yes (MobileNet 71.68%% x 98%%)
- [ ] Yes (SSD-MobileNet 0.22 mAP x 99%%)
- [%(resnet_accuracy_met)s] Yes (ResNet 76.46%% x 99%%)
- [ ] Yes (SSD-1200 0.20 mAP x 99%%)
- [ ] Yes (NMT 23.9 BLEU x 99%%)
- [%(accuracy_not_met)s] No
For each SUT and scenario, did the submission run on the whole
validation set in accuracy mode? (check one)
- [x] Yes
- [ ] No
How many samples are loaded into the QSL in performance mode?
%(performance_sample_count)s
For each SUT and scenario, does the number of loaded samples in the
QSL in performance mode meet the minimum requirement? (check all that
apply)
- [%(performance_sample_count_1024)s] Yes (ResNet and MobileNet 1,024 samples)
- [%(performance_sample_count_256)s] Yes (SSD-MobileNet 256 samples)
- [%(performance_sample_count_64)s] Yes (SSD-1200 64 samples)
- [ ] Yes (NMT 3,903,900 samples)
- [%(performance_sample_count_not_met)s] No
For each SUT and scenario, is the experimental duration greater than
or equal to 60 seconds? (check one)
- [x] Yes
- [ ] No
Does the submission use LoadGen? (check one)
- [x] Yes
- [ ] No
Is your loadgen commit from one of these allowed commit hashes?
- [%(revision_61220457de)s] 61220457dec221ed1984c62bd9d382698bd71bc6
- [%(revision_5684c11e39)s] 5684c11e3987b614aae830390fa0e92f56b7e800
- [%(revision_55c0ea4e77)s] 55c0ea4e772634107f3e67a6d0da61e6a2ca390d
- [%(revision_d31c18fbd9)s] d31c18fbd9854a4f1c489ca1bc4cd818e48f2bc5
- [%(revision_1d0e06e54a)s] 1d0e06e54a7d763cf228bdfd8b1e987976e4222f
- [%(revision_other)s] Other, please specify: %(revision_other_specify)s
Do you have any additional change to LoadGen? (check one)
- [ ] Yes, please specify:
- [x] No
Does the submission run the same code in accuracy and performance
modes? (check one)
- [x] Yes
- [ ] No
Where is the LoadGen trace stored? (check one)
- [x] Host DRAM
- [ ] Other, please specify:
For the submitted result, what is the QSL random number generator seed?
- [x] 0x2b7e151628aed2a6ULL (3133965575612453542)
- [ ] Other, please specify:
For the submitted results, what is the sample index random number generator seed?
- [x] 0x093c467e37db0c7aULL (665484352860916858)
- [ ] Other, please specify:
For the submitted results, what is the schedule random number generator seed?
- [x] 0x3243f6a8885a308dULL (3622009729038561421)
- [ ] Other, please specify:
For each SUT and scenario, is the submission run the correct number of
times for the relevant scenario? (check one)
- [x] Yes (Accuracy 1x Performance 1x Single-Stream, Multi-Stream, Offline)
- [ ] Yes (Accuracy 1x Performance 5x Server)
- [ ] No
Are the weights calibrated using data outside of the calibration set?
(check one)
- [ ] Yes
- [x] No
What untimed pre-processing does the submission use? (check all that apply)
- [x] Resize
- [ ] Reorder channels or transpose
- [ ] Pad
- [x] A single crop
- [x] Mean subtraction and normalization
- [ ] Convert to whitelisted format
- [ ] No pre-processing
- [ ] Other, please specify:
What numerics does the submission use? (check all that apply)
- [ ] INT4
- [ ] INT8
- [ ] INT16
- [%(numerics_uint8)s] UINT8
- [ ] UINT16
- [ ] FP11
- [ ] FP16
- [ ] BF16
- [%(numerics_fp32)s] FP32
- [ ] Other, please specify:
Which of the following techniques does the submission use? (check all that apply)
- [ ] Wholesale weight replacement
- [ ] Weight supplements
- [ ] Discarding non-zero weight elements
- [ ] Pruning
- [ ] Caching queries
- [ ] Caching responses
- [ ] Caching intermediate computations
- [ ] Modifying weights during the timed portion of an inference run
- [ ] Weight quantization algorithms that are similar in size to the
non-zero weights they produce
- [ ] Hard coding the total number of queries
- [ ] Techniques that boost performance for fixed length experiments but
are inapplicable to long-running services except in the offline
scenario
- [ ] Using knowledge of the LoadGen implementation to predict upcoming
lulls or spikes in the server scenario
- [ ] Treating beams in a beam search differently. For example,
employing different precision for different beams
- [ ] Changing the number of beams per beam search relative to the reference
- [ ] Incorporating explicit statistical information about the performance or accuracy sets
- [ ] Techniques that take advantage of upsampled images.
- [ ] Techniques that only improve performance when there are identical samples in a query.
- [x] None of the above
Is the submission congruent with all relevant MLPerf rules?
- [x] Yes
- [ ] No
For each SUT, does the submission accurately reflect the real-world
performance of the SUT?
- [x] Yes
- [ ] No"""
def get_checklist(checklist_template=checklist_template, name='Anton Lokhmotov', email='anton@dividiti.com',
system='rpi4-tflite-v1.15', system_name='Raspberry Pi 4 (rpi4)', revision='61220457de',
division='closed', category='available', task='image-classification', benchmark='mobilenet', scenario='singlestream',
performance_sample_count=1024, performance_sample_count_met=True,
accuracy_pc=12.345, accuracy_met=True, numerics='fp32'):
def tick(var): return "x" if var else " "
print("=" * 100)
print(system)
print("=" * 100)
revision_other = revision not in [ '61220457de', '5684c11e39', '55c0ea4e77', 'd31c18fbd9', '1d0e06e54a' ]
benchmark_other = benchmark not in [ 'mobilenet', 'resnet']
if benchmark=='mobilenet':
accuracy_met = accuracy_pc >= 71.676*0.98
elif benchmark=='resnet':
accuracy_met = accuracy_pc >= 76.456*0.99
else:
accuracy_met = accuracy_met and accuracy_pc > 0
checklist = checklist_template % {
"name" : name,
"email" : email,
"system_name": system_name,
# Division.
"closed" : tick(division=='closed'),
"open" : tick(division=='open'),
# Division.
"category_available" : tick(category.lower()=='available'),
"category_preview" : tick(category.lower()=='preview'),
"category_rdi" : tick(category.lower()=='rdi'),
# Benchmark.
"benchmark_mobilenet": tick(benchmark=='mobilenet'),
"benchmark_resnet": tick(benchmark=='resnet'),
"benchmark_other": tick(benchmark_other),
"benchmark_other_specify": benchmark if benchmark_other else '',
# Table.
"system" : system,
"benchmark" : benchmark,
"query_count": 50000 if task=='image-classification' else 5000,
"accuracy_pc" : "%.3f" % accuracy_pc,
# Scenario.
"scenario_singlestream": tick(scenario=='singlestream'),
"scenario_multistream": tick(scenario=='multistream'),
"scenario_server": tick(scenario=='server'),
"scenario_offline": tick(scenario=='offline'),
# Accuracy.
"mobilenet_accuracy_met" : tick(benchmark=='mobilenet' and accuracy_met),
"resnet_accuracy_met" : tick(benchmark=='resnet' and accuracy_met),
"accuracy_not_met" : tick(not accuracy_met),
# "How many samples are loaded into the QSL in performance mode?"
"performance_sample_count": performance_sample_count,
"performance_sample_count_1024": tick(performance_sample_count==1024),
"performance_sample_count_256": tick(performance_sample_count==256),
"performance_sample_count_64": tick(performance_sample_count==64),
"performance_sample_count_not_met": tick(not performance_sample_count_met), # TODO
# LoadGen revision.
"revision_61220457de": tick(revision=='61220457de'),
"revision_5684c11e39": tick(revision=='5684c11e39'),
"revision_55c0ea4e77": tick(revision=='55c0ea4e77'),
"revision_d31c18fbd9": tick(revision=='d31c18fbd9'),
"revision_1d0e06e54a": tick(revision=='1d0e06e54a'),
"revision_other": tick(revision_other),
"revision_other_specify": revision if revision_other else '',
# Numerics.
"numerics_uint8": tick(numerics=='uint8'),
"numerics_fp32": tick(numerics=='fp32'),
}
print(checklist)
print("-" * 100)
return checklist
# null = get_checklist(system='rpi4-armnn-v19.08-neon', system_name='Raspberry Pi 4 (rpi4)', benchmark='mobilenet', accuracy_pc=70.241, numerics='uint8')
# null = get_checklist(system='hikey960-tflite-v1.15', system_name='Linaro HiKey 960 (hikey960)', benchmark='resnet', accuracy_pc=75.692, revision='deadbeef')
null = get_checklist(system='velociti-tensorflow-v1.14-cpu', name='Anton Lokhmotov; Emanuele Vitali', email='anton@dividiti.com; emanuele.vitali@polimi.it', system_name='HP Z640 G1X62EA workstation (velociti)', division='open', category='RDI', benchmark='ssd-mobilenet-fpn')
#
# ## Check the experimental data
# In[ ]:
#
# Image Classification - Closed (MobileNet, ResNet).
#
repos_image_classification_closed = [
# firefly
'mlperf.closed.image-classification.firefly.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/18
'mlperf.closed.image-classification.firefly.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/21
'mlperf.closed.image-classification.firefly.armnn-v19.08.opencl', #https://github.com/mlperf/submissions_inference_0_5/pull/22
# hikey960
'mlperf.closed.image-classification.hikey960.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/23
'mlperf.closed.image-classification.hikey960.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/24
'mlperf.closed.image-classification.hikey960.armnn-v19.08.opencl', # https://github.com/mlperf/submissions_inference_0_5/pull/25
# rpi4
'mlperf.closed.image-classification.rpi4.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/26/
'mlperf.closed.image-classification.rpi4.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/30
# mate10pro
'mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/32
'mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl', # https://github.com/mlperf/submissions_inference_0_5/pull/35
]
repos_image_classification_closed_audit = [
'mlperf.closed.image-classification.firefly.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/234
'mlperf.closed.image-classification.hikey960.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/236
'mlperf.closed.image-classification.rpi4.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/238
#'mlperf.closed.image-classification.mate10pro.audit',
]
#
# Image Classification - Open (MobileNets-v1,v2).
#
repos_image_classification_open = [
# firefly
'mlperf.open.image-classification.firefly.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/39
'mlperf.open.image-classification.firefly.tflite-v1.15.mobilenet-v1-quantized', # https://github.com/mlperf/submissions_inference_0_5/pull/127
'mlperf.open.image-classification.firefly.armnn-v19.08.opencl', # https://github.com/mlperf/submissions_inference_0_5/pull/40
'mlperf.open.image-classification.firefly.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/120
# hikey960
'mlperf.open.image-classification.hikey960.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/37
'mlperf.open.image-classification.hikey960.tflite-v1.15.mobilenet-v1-quantized', # https://github.com/mlperf/submissions_inference_0_5/pull/128
'mlperf.open.image-classification.hikey960.armnn-v19.08.opencl', # https://github.com/mlperf/submissions_inference_0_5/pull/38
'mlperf.open.image-classification.hikey960.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/121
# rpi4
'mlperf.open.image-classification.rpi4.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/122
'mlperf.open.image-classification.rpi4.tflite-v1.15.mobilenet-v1-quantized', # https://github.com/mlperf/submissions_inference_0_5/pull/129
'mlperf.open.image-classification.rpi4.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/123
# mate10pro
'mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet', # https://github.com/mlperf/submissions_inference_0_5/pull/130
'mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet-v1-quantized', # https://github.com/mlperf/submissions_inference_0_5/pull/135
]
repos_image_classification_open_audit = [
'mlperf.open.image-classification.firefly.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/255
'mlperf.open.image-classification.hikey960.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/257
'mlperf.open.image-classification.rpi4.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/258
#'mlperf.open.image-classification.mate10pro.audit',
]
#
# Object Detection - Open (TensorFlow Model Zoo + YOLO-v3)
#
repos_object_detection_open = [
# velociti
'mlperf.open.object-detection.velociti', # https://www.dropbox.com/s/wiea3a8zf077jsv/mlperf.open.object-detection.velociti.zip
]
# In[ ]:
# repos_for_testing = [
# 'mlperf.closed.image-classification.mate10pro.tflite-v1.13.mobilenet.BAD_LOADGEN',
# 'mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.BAD_RESNET',
# 'mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.BAD_RESNET',
# 'mlperf-inference-vision-experiments-count5'
# ]
# In[ ]:
# #!ck recache repo
# for repo_uoa in repos:
# print("=" * 100)
# print(repo_uoa)
# print("=" * 100)
# !ck list $repo_uoa:experiment:* | sort
# print("-" * 100)
# print("")
# In[ ]:
upstream_path=os.environ.get('CK_ENV_MLPERF_INFERENCE','')
# In[ ]:
root_dir=os.environ.get('CK_MLPERF_SUBMISSION_ROOT','')
def check_experimental_results(repo_uoa, module_uoa='experiment', tags='mlperf', submitter='dividiti', path=None, audit=False):
if not os.path.exists(root_dir): os.mkdir(root_dir)
print("Storing results under '%s'" % root_dir)
r = ck.access({'action':'search', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'tags':tags})
if r['return']>0:
print('Error: %s' % r['error'])
exit(1)
experiments = r['lst']
for experiment in experiments:
data_uoa = experiment['data_uoa']
r = ck.access({'action':'list_points', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'data_uoa':data_uoa})
if r['return']>0:
print('Error: %s' % r['error'])
exit(1)
experiment_tags = r['dict']['tags']
experiment_points = r['points']
experiment_path = r['path']
# Load pipeline to determine the original program_name
load_pipeline_adict = { 'action': 'load_pipeline',
'repo_uoa': repo_uoa,
'module_uoa': module_uoa,
'data_uoa': data_uoa,
}
r=ck.access( load_pipeline_adict )
if r['return']>0:
print('Error: %s' % r['error'])
exit(1)
pipeline = r['pipeline']
program_name = pipeline['choices']['data_uoa']
print("*" * 100)
division=task=platform=library=inference_engine=backend=benchmark=scenario=mode=preprocessing=test=notes = ''
for atag in experiment_tags:
if '.' in atag: # Expected format: attribute1.value1 , attribute2.value2 , etc - in any order
# Example: "division.open", "submitter.dividiti", "task.image-classification", "platform.xavier",
# "inference_engine.tflite", "inference_engine_version.v2.1.1", "inference_engine_backend.dummy",
# "workload.mobilenet-v2-1.4-224", "scenario.singlestream", "mode.performance"
(attribute, value) = atag.split('.', 1) # protection from dotted version notation!
if attribute == 'division':
division = value
elif attribute == 'task':
task = value
elif attribute == 'platform':
platform = value
elif attribute == 'inference_engine':
inference_engine = value
elif attribute == 'inference_engine_version':
inference_engine_version = value
elif attribute == 'inference_engine_backend':
backend = value if value!='dummy' else ''
elif attribute == 'workload': # actually, the model!
benchmark = value
elif attribute == 'scenario':
scenario = value
elif attribute == 'mode':
mode = value
if division and task and platform and inference_engine and benchmark and scenario and mode:
library = inference_engine + (('-' + inference_engine_version) if inference_engine_version else '')
elif 'velociti' in experiment_tags:
# Expected format: [ "mlperf", "open", "object-detection", "velociti", "cpu", "rcnn-inception-resnet-v2-lowproposals", "singlestream", "accuracy" ]
(_, division, task, platform, backend, benchmark, scenario, mode) = experiment_tags
if task == 'object-detection':
library = 'tensorflow-v1.14'
else:
library = 'tensorrt-v6.0'
backend = ''
notes = '======= DEMO ======='
elif 'accuracy' in experiment_tags:
# FIXME: With the benefit of hindsight, [ ..., "armnn-v19.08", "neon", ... ] should have come
# as one tag "armnn-v19.08-neon", since we join them in this notebook anyway.
if 'neon' in experiment_tags or 'opencl' in experiment_tags:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "armnn-v19.08", "neon", "mobilenet-v1-0.5-128", "singlestream", "accuracy", "using-opencv" ]
(_, division, task, platform, library, backend, benchmark, scenario, mode, preprocessing) = experiment_tags
else:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "tflite-v1.15", "mobilenet-v1-0.5-128", "singlestream", "accuracy", "using-opencv" ]
(_, division, task, platform, library, benchmark, scenario, mode, preprocessing) = experiment_tags
elif 'performance' in experiment_tags:
if 'neon' in experiment_tags or 'opencl' in experiment_tags:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "armnn-v19.08", "neon", "mobilenet-v1-0.5-128", "singlestream", "performance" ]
(_, division, task, platform, library, backend, benchmark, scenario, mode) = experiment_tags
else:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "tflite-v1.15", "mobilenet-v1-0.5-128", "singlestream", "performance" ]
(_, division, task, platform, library, benchmark, scenario, mode) = experiment_tags
elif 'audit' in experiment_tags: # As accuracy but with the test name instead of the preprocessing method.
if 'neon' in experiment_tags or 'opencl' in experiment_tags:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "armnn-v19.08", "neon", "mobilenet-v1-0.5-128", "singlestream", "audit", "TEST03" ]
(_, division, task, platform, library, backend, benchmark, scenario, mode, test) = experiment_tags
else:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "tflite-v1.15", "mobilenet-v1-0.5-128", "singlestream", "audit", "TEST03" ]
(_, division, task, platform, library, benchmark, scenario, mode, test) = experiment_tags
else:
raise Exception("Expected 'accuracy' or 'performance' or 'audit' in experiment_tags!")
# if mode == 'accuracy': continue
organization = submitter
if not inference_engine:
(inference_engine, inference_engine_version) = library.split('-')
if backend != '':
system = platform+'-'+library+'-'+backend
else:
system = platform+'-'+library
division_system = division+'-'+system
program_and_model_combination = program_name+'-'+benchmark
#
# Directory structure according to the Inference section of the General MLPerf Submission Rules:
# https://github.com/mlperf/policies/blob/master/submission_rules.adoc#552-inference
#
# /
# /
#
division_dir = os.path.join(root_dir, division)
if not os.path.exists(division_dir): os.mkdir(division_dir)
organization_dir = os.path.join(division_dir, organization)
if not os.path.exists(organization_dir): os.mkdir(organization_dir)
#
# "systems"/
# .json
#
systems_dir = os.path.join(organization_dir, 'systems')
if not os.path.exists(systems_dir): os.mkdir(systems_dir)
system_json_name = '%s.json' % system
system_json_path = os.path.join(systems_dir, system_json_name)
system_json = dump_system_description_dictionary(system_json_path, division, platform, inference_engine, inference_engine_version, backend)
print('%s' % systems_dir)
print(' |_ %s [%s]' % (system_json_name, division_system))
#
# "code"/
# /
# /
#
#
code_dir = os.path.join(organization_dir, 'code')
if not os.path.exists(code_dir): os.mkdir(code_dir)
# FIXME: For now, not always "per reference".
benchmark_dir = os.path.join(code_dir, benchmark)
if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
implementation_dir = os.path.join(benchmark_dir, program_name)
if not os.path.exists(implementation_dir): os.mkdir(implementation_dir)
print('%s' % code_dir)
# Create 'README.md'.
implementation_readme_name = 'README.md'
implementation_readme_path = os.path.join(implementation_dir, implementation_readme_name)
# pprint(program_name)
# pprint(implementation_readmes)
implementation_readme = implementation_readmes.get(program_name, '')
with open(implementation_readme_path, 'w') as implementation_readme_file:
implementation_readme_file.writelines(implementation_readme)
if implementation_readme == '':
print(' |_ %s [EMPTY]' % implementation_readme_name)
raise
else:
print(' |_ %s' % implementation_readme_name)
#
# "measurements"/
# /
# /
# /
# _.json
# README.md
# user.conf
# mlperf.conf
# calibration_process.adoc (?)
# submission_checklist.txt
#
measurements_dir = os.path.join(organization_dir, 'measurements')
if not os.path.exists(measurements_dir): os.mkdir(measurements_dir)
system_dir = os.path.join(measurements_dir, system)
if not os.path.exists(system_dir): os.mkdir(system_dir)
benchmark_dir = os.path.join(system_dir, benchmark)
if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
scenario_dir = os.path.join(benchmark_dir, scenario)
if not os.path.exists(scenario_dir): os.mkdir(scenario_dir)
print(scenario_dir)
# Create '_.json'.
system_implementation_json_name = system+'_'+program_name+'.json'
system_implementation_json_path = os.path.join(scenario_dir, system_implementation_json_name)
implementation_benchmark_json = dump_implementation_dictionary(system_implementation_json_path, pipeline['dependencies']['weights'], inference_engine, program_name, benchmark)
print(' |_ %s [for %s]' % (system_implementation_json_name, program_and_model_combination))
# Create 'README.md' based on the division and task (basically, mentions a division- and task-specific script).
measurements_readme_name = 'README.md'
measurements_readme_path = os.path.join(scenario_dir, measurements_readme_name)
measurements_readme = measurements_readmes.get(division+'-'+task, '')
if measurements_readme != '':
with open(measurements_readme_path, 'w') as measurements_readme_file:
measurements_readme_file.writelines(measurements_readme)
print(' |_ %s [for %s %s]' % (measurements_readme_name, division, task))
else:
raise Exception("Invalid measurements README!")
# Create 'NOTES.txt'.
measurements_notes_name = 'NOTES.txt'
measurements_notes_path = os.path.join(scenario_dir, measurements_notes_name)
measurements_notes = notes
if measurements_notes != '':
with open(measurements_notes_path, 'w') as measurements_notes_file:
measurements_notes_file.writelines(measurements_notes)
print(' |_ %s [for %s %s]' % (measurements_notes_name, division, task))
# Try to find environment for 'user.conf'.
if program_name.endswith('-loadgen'):
program_config_tag = program_name[:-len('-loadgen')]
else:
program_config_tag = program_name
loadgen_config_tags='loadgen,config,'+program_config_tag # FIXME: needs to be fixed on the soft: entry side
lgc = ck.access({'action':'search', 'module_uoa':'env', 'tags':loadgen_config_tags})
if lgc['return']>0:
print('Error: %s' % lgc['error'])
exit(1)
envs = lgc['lst']
if len(envs) > 1:
# Found several environments.
print('Error: More than one environment found with tags=\'%s\'' % loadgen_config_tags)
exit(1)
elif len(envs) == 1:
# Found exactly one environment.
lgc = ck.access({'action':'load', 'module_uoa':'env', 'data_uoa':envs[0]['data_uoa']})
if lgc['return']>0:
print('Error: %s' % lgc['error'])
exit(1)
# CK_ENV_LOADGEN_CONFIG=/home/anton/CK_REPOS/ck-mlperf/soft/config.loadgen/image-classification-armnn-tflite-loadgen-conf
# CK_ENV_LOADGEN_CONFIG_FILE=/home/anton/CK_REPOS/ck-mlperf/soft/config.loadgen/image-classification-armnn-tflite-loadgen-conf/user.conf
user_conf_path=lgc['dict']['env']['CK_ENV_LOADGEN_CONFIG_FILE']
user_conf_name=user_conf_path[len(lgc['dict']['env']['CK_ENV_LOADGEN_CONFIG'])+1:]
elif len(envs) == 0:
# Not found any environments: copy 'user.conf' from implementation source.
user_conf_name = 'user.conf'
implementation_path = get_program_path(program_name)
if not implementation_path:
raise Exception("Invalid implementation path!")
user_conf_path = os.path.join(implementation_path, user_conf_name)
copy2(user_conf_path, scenario_dir)
print(' |_ %s [from %s]' % (user_conf_name, user_conf_path))
# Copy 'mlperf.conf' from MLPerf Inference source.
mlperf_conf_name = 'mlperf.conf'
mlperf_conf_path = os.path.join(scenario_dir, mlperf_conf_name)
if program_name in [ 'image-classification-tflite-loadgen', 'image-classification-armnn-tflite-loadgen' ]:
# Write a snapshot from https://github.com/dividiti/inference/blob/61220457dec221ed1984c62bd9d382698bd71bc6/v0.5/mlperf.conf
with open(mlperf_conf_path, 'w') as mlperf_conf_file:
mlperf_conf_file.writelines(mlperf_conf_6122045)
print(' |_ %s [from %s]' % (mlperf_conf_name, 'github.com/mlperf/inference@6122045'))
else:
upstream_mlperf_conf_path = os.path.join(upstream_path, 'v0.5', 'mlperf.conf')
copy2(upstream_mlperf_conf_path, mlperf_conf_path)
print(' |_ %s [from %s]' % (mlperf_conf_name, upstream_mlperf_conf_path))
# Write submission_checklist.txt into the same directory later, once accuracy.txt is parsed.
#
# "results"/
# /
# /
# /
# performance/
# run_x/ # 1 run for single stream and offline, 5 otherwise
# mlperf_log_summary.txt
# mlperf_log_detail.txt
# mlperf_log_trace.json
# accuracy/
# mlperf_log_accuracy.json
# compliance_checker_log.txt
#
results_dir = os.path.join(organization_dir, 'results')
if not os.path.exists(results_dir): os.mkdir(results_dir)
system_dir = os.path.join(results_dir, system)
if not os.path.exists(system_dir): os.mkdir(system_dir)
benchmark_dir = os.path.join(system_dir, benchmark)
if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
scenario_dir = os.path.join(benchmark_dir, scenario)
if not os.path.exists(scenario_dir): os.mkdir(scenario_dir)
mode_dir = os.path.join(scenario_dir, mode)
if not os.path.exists(mode_dir): os.mkdir(mode_dir)
print(mode_dir)
if audit:
# Deal with a subset of audit tests.
# if test not in [ 'TEST03' ]: # [ 'TEST01', 'TEST03', 'TEST04-A', 'TEST04-B', 'TEST05' ]:
# continue
# Save the accuracy and performance dirs for the corresponding submission experiment.
accuracy_dir = os.path.join(scenario_dir, 'accuracy')
performance_dir = os.path.join(scenario_dir, 'performance', 'run_1')
# Use the mode expected for each test.
mode = 'performance' if test != 'TEST03' else 'submission'
# Create a similar directory structure to results_dir, with another level, test_dir,
# between scenario_dir and mode_dir.
audit_dir = os.path.join(organization_dir, 'audit')
if not os.path.exists(audit_dir): os.mkdir(audit_dir)
system_dir = os.path.join(audit_dir, system)
if not os.path.exists(system_dir): os.mkdir(system_dir)
benchmark_dir = os.path.join(system_dir, benchmark)
if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
scenario_dir = os.path.join(benchmark_dir, scenario)
if not os.path.exists(scenario_dir): os.mkdir(scenario_dir)
test_dir = os.path.join(scenario_dir, test)
if not os.path.exists(test_dir): os.mkdir(test_dir)
mode_dir = os.path.join(test_dir, mode)
if not os.path.exists(mode_dir): os.mkdir(mode_dir)
# For each point (should be one point for each performance run).
for (point, point_idx) in zip(experiment_points, range(1,len(experiment_points)+1)):
point_file_path = os.path.join(experiment_path, 'ckp-%s.0001.json' % point)
with open(point_file_path) as point_file:
point_data_raw = json.load(point_file)
characteristics_list = point_data_raw['characteristics_list']
characteristics = characteristics_list[0]
# Set the leaf directory.
if mode == 'performance':
run_dir = os.path.join(mode_dir, 'run_%d' % point_idx)
if not os.path.exists(run_dir): os.mkdir(run_dir)
last_dir = run_dir
# Performance notes. Should ideally go inside the run_x dir, but the checker complains.
if 'velociti' in experiment_tags and 'tensorrt' in experiment_tags:
num_streams = point_data_raw['choices']['env'].get('CK_LOADGEN_MULTISTREAMNESS', '')
if num_streams == '': num_streams = '?'
performance_notes = 'uid={}: {} streams'.format(point, num_streams)
performance_notes_name = run_dir + '.txt'
performance_notes_path = os.path.join(mode_dir, performance_notes_name)
with open(performance_notes_path, 'w') as performance_notes_file:
performance_notes_file.writelines(performance_notes)
print(' |_ %s' % performance_notes_name)
else:
last_dir = mode_dir
print(last_dir)
# Dump files in the leaf directory.
mlperf_log = characteristics['run'].get('mlperf_log',{})
# Summary file (with errors and warnings in accuracy mode, with statistics in performance mode).
summary_txt_name = 'mlperf_log_summary.txt'
summary_txt_path = os.path.join(last_dir, summary_txt_name)
summary = mlperf_log.get('summary','')
with open(summary_txt_path, 'w') as summary_txt_file:
summary_txt_file.writelines(summary)
print(' |_ %s' % summary_txt_name)
# Detail file (with settings).
detail_txt_name = 'mlperf_log_detail.txt'
detail_txt_path = os.path.join(last_dir, detail_txt_name)
detail = mlperf_log.get('detail','')
with open(detail_txt_path, 'w') as detail_txt_file:
detail_txt_file.writelines(detail)
print(' |_ %s' % detail_txt_name)
# Accuracy file (with accuracy dictionary).
# TODO: Move the next 5 lines into the (if mode == 'accuracy') block,
# once the submission checker no longer complains as follows:
# "performance/run_1 has file list mismatch (['mlperf_log_accuracy.json'])"
accuracy_json_name = 'mlperf_log_accuracy.json'
accuracy_json_path = os.path.join(last_dir, accuracy_json_name)
with open(accuracy_json_path, 'w') as accuracy_json_file:
json.dump(mlperf_log.get('accuracy',{}), accuracy_json_file, indent=2)
print(' |_ %s' % accuracy_json_name)
# Do what's required by NVIDIA's audit tests.
if audit:
test_path = os.path.join(upstream_path, 'v0.5', 'audit', 'nvidia', test)
if 'TEST01' in experiment_tags:
# Verify that the accuracy (partially) dumped for the audit test matches that for the submision.
verify_accuracy_py = os.path.join(test_path, 'verify_accuracy.py')
submission_accuracy_json_path = os.path.join(accuracy_dir, accuracy_json_name)
verify_accuracy_txt = subprocess.getoutput('python3 {} -a {} -p {}'.format(verify_accuracy_py, submission_accuracy_json_path, accuracy_json_path))
verify_accuracy_txt_name = 'verify_accuracy.txt'
verify_accuracy_txt_path = os.path.join(test_dir, verify_accuracy_txt_name)
with open(verify_accuracy_txt_path, 'w') as verify_accuracy_txt_file:
verify_accuracy_txt_file.write(verify_accuracy_txt)
print('%s' % test_dir)
print(' |_ %s' % verify_accuracy_txt_name)
if test in [ 'TEST01', 'TEST03', 'TEST05' ]:
# Verify that the performance for the audit test matches that for the submission.
verify_performance_py = os.path.join(test_path, 'verify_performance.py')
submission_summary_txt_path = os.path.join(performance_dir, summary_txt_name)
verify_performance_txt = subprocess.getoutput('python3 {} -r {} -t {}'.format(verify_performance_py, submission_summary_txt_path, summary_txt_path))
verify_performance_txt_name = 'verify_performance.txt'
verify_performance_txt_path = os.path.join(test_dir, verify_performance_txt_name)
with open(verify_performance_txt_path, 'w') as verify_performance_txt_file:
verify_performance_txt_file.write(verify_performance_txt)
print('%s' % test_dir)
print(' |_ %s' % verify_performance_txt_name)
if test in [ 'TEST04-A', 'TEST04-B' ]:
test04a_summary_txt_path = os.path.join(scenario_dir, 'TEST04-A', 'performance', 'run_1', summary_txt_name)
test04b_summary_txt_path = os.path.join(scenario_dir, 'TEST04-B', 'performance', 'run_1', summary_txt_name)
if os.path.exists(test04a_summary_txt_path) and os.path.exists(test04b_summary_txt_path):
# If both tests have been processed, verify that their performance matches.
verify_performance_py = os.path.join(upstream_path, 'v0.5', 'audit', 'nvidia', 'TEST04-A', 'verify_test4_performance.py')
#print("python3 {} -u {} -s {}".format(verify_performance_py, test04a_summary_txt_path, test04b_summary_txt_path))
verify_performance_txt = subprocess.getoutput('python3 {} -u {} -s {}'.format(verify_performance_py, test04a_summary_txt_path, test04b_summary_txt_path))
#print(verify_performance_txt)
verify_performance_txt_name = 'verify_performance.txt'
verify_performance_txt_path = os.path.join(scenario_dir, 'TEST04-A', verify_performance_txt_name)
with open(verify_performance_txt_path, 'w') as verify_performance_txt_file:
verify_performance_txt_file.write(verify_performance_txt)
print('%s' % os.path.join(scenario_dir, 'TEST04-A'))
print(' |_ %s' % verify_performance_txt_name)
else:
# Need both A/B tests to be processed. Wait for the other one.
continue
# Generate accuracy.txt.
if mode == 'accuracy' or mode == 'submission':
accuracy_txt_name = 'accuracy.txt'
accuracy_txt_path = os.path.join(last_dir, accuracy_txt_name)
if task == 'image-classification':
accuracy_imagenet_py = os.path.join(upstream_path, 'v0.5', 'classification_and_detection', 'tools', 'accuracy-imagenet.py')
accuracy_txt = subprocess.getoutput('python3 {} --imagenet-val-file {} --mlperf-accuracy-file {}'.format(accuracy_imagenet_py, imagenet_val_file, accuracy_json_path))
# The last (and only line) is e.g. "accuracy=76.442%, good=38221, total=50000".
accuracy_line = accuracy_txt.split('\n')[-1]
match = re.match('accuracy=(.+)%, good=(\d+), total=(\d+)', accuracy_line)
accuracy_pc = float(match.group(1))
elif task == 'object-detection':
accuracy_coco_py = os.path.join(upstream_path, 'v0.5', 'classification_and_detection', 'tools', 'accuracy-coco.py')
# os.environ['PYTHONPATH'] = pythonpath_coco+':'+os.environ.get('PYTHONPATH','')
accuracy_txt = subprocess.getoutput('python3 {} --coco-dir {} --mlperf-accuracy-file {}'.format(accuracy_coco_py, coco_dir, accuracy_json_path))
# The last line is e.g. "mAP=13.323%".
accuracy_line = accuracy_txt.split('\n')[-1]
match = re.match('mAP\=([\d\.]+)\%', accuracy_line)
if match:
accuracy_pc = float(match.group(1))
else:
raise Exception("Could not parse accuracy from: "+accuracy_txt)
else:
raise Exception("Invalid task '%s'!" % task)
with open(accuracy_txt_path, 'w') as accuracy_txt_file:
accuracy_txt_file.write(accuracy_txt)
print(' |_ %s [%.3f%% parsed from "%s"]' % (accuracy_txt_name, accuracy_pc, accuracy_line))
# Generate submission_checklist.txt for each system, benchmark and scenario under "measurements/".
if mode == 'accuracy' and not audit:
checklist_name = 'submission_checklist.txt'
checklist_path = os.path.join(measurements_dir, system, benchmark, scenario, checklist_name)
# Extract LoadGen revision from the second line of e.g.
# "pid": 28660, "tid": 28660, "ts": 8750ns : version : .5a1 @ 61220457de
# FIXME: In practice, the revision may be different for accuracy and performance runs
# (happened on rpi4 due to a late LoadGen fix). We would prefer to use one from
# the performance one, as it may be more critical for performance evaluation.
# However, as we only write the checklist from the accuracy run, we are somewhat stuck.
loadgen_revision = detail[1].split('@')[1].strip()
# FIXME: The actual performance_sample_count can be extracted from the performance run.
# Again, this is not available to us here.
# We could check in user.conf, but we would need to parse it.
performance_sample_count = 1024 if task == 'image-classification' else 256
# Write the checklist.
if division == 'open' and task == 'object-detection':
# Collaboration between dividiti and Politecnico di Milano.
print(system)
checklist = get_checklist(name='Anton Lokhmotov; Emanuele Vitali',
email='anton@dividiti.com; emanuele.vitali@polimi.it',
division=division, task=task, system=system,
system_name=system_json['system_name'], category=system_json['status'],
revision=loadgen_revision, benchmark=benchmark, accuracy_pc=accuracy_pc,
performance_sample_count=performance_sample_count,
numerics=implementation_benchmark_json['weight_data_types'])
else:
checklist = get_checklist(division=division, task=task, system=system,
system_name=system_json['system_name'], category=system_json['status'],
revision=loadgen_revision, benchmark=benchmark, accuracy_pc=accuracy_pc,
performance_sample_count=performance_sample_count,
numerics=implementation_benchmark_json['weight_data_types'])
with open(checklist_path, 'w') as checklist_file:
checklist_file.writelines(checklist)
# # Trace file (should omit trace from v0.5).
# trace_json_name = 'mlperf_log_trace.json'
# trace_json_path = os.path.join(last_dir, trace_json_name)
# with open(trace_json_path, 'w') as trace_json_file:
# json.dump(mlperf_log.get('trace',{}), trace_json_file, indent=2)
return
repo = os.environ.get('CK_MLPERF_SUBMISSION_REPO','')
repos = [ repo ] if repo != '' else []
for repo_uoa in repos:
check_experimental_results(repo_uoa, audit=False)
submitter = os.environ.get('CK_MLPERF_SUBMISSION_SUBMITTER','dividiti')
# ### Extract audit repos
# In[ ]:
# # audit_repos = repos_image_classification_closed_audit + repos_image_classification_open_audit
# audit_repos = [ 'mlperf.closed.image-classification.mate10pro.audit' ]
# for repo_uoa in audit_repos:
# check_experimental_results(repo_uoa, path=path, submitter=submitter, audit=True)
# ### Run submission checker
# In[ ]:
print("*" * 100)
submission_checker_py = os.path.join(upstream_path, 'v0.5', 'tools', 'submission', 'submission-checker.py')
# The checker has a weird bug. When submitting to open, 'closed//results' must exist on disk.
# Vice versa, When submitting to closed, 'open//results' must exist on disk.
# Therefore, create both directories if they do not exist before invoking the checker.
open_org_results_dir = os.path.join(root_dir, 'open', submitter, 'results')
closed_org_results_dir = os.path.join(root_dir, 'closed', submitter, 'results')
subprocess.run(['mkdir', '-p', open_org_results_dir])
subprocess.run(['mkdir', '-p', closed_org_results_dir])
# Run the checker.
checker_log = subprocess.getoutput('python3 {} --input {} --submitter {}'.format(submission_checker_py, root_dir, submitter))
print(checker_log)
checker_log_name = 'compliance_checker_log.txt'
# Write the checker results once closed/ and once under open/.
for results_dir in [ open_org_results_dir, closed_org_results_dir ]:
checker_log_path = os.path.join(results_dir, checker_log_name)
with open(checker_log_path, 'w') as checker_log_file:
checker_log_file.write(checker_log)
print(results_dir)
print(' |_%s' % checker_log_name)