#!/usr/bin/env python
# coding: utf-8
# # Generate [dividiti](http://dividiti.com)'s submissions to [MLPerf Inference v0.5](https://github.com/mlperf/inference/tree/master/v0.5)
#
# ## Overview
# This Jupyter notebook covers [dividiti](http://dividiti.com)'s submissions to [MLPerf Inference v0.5](https://github.com/mlperf/inference/tree/master/v0.5). It validates that experimental data obtained via automated, portable and reproducible [Collective Knowledge](http://cknowledge.org) workflows conforms to [General MLPerf Submission Rules](https://github.com/mlperf/policies/blob/master/submission_rules.adoc)
# and [MLPerf Inference Rules](https://github.com/mlperf/inference_policies/blob/master/inference_rules.adoc), including runnning the official [`submission_checker.py`](https://github.com/mlperf/inference/blob/master/v0.5/tools/submission/submission-checker.py).
# A live version of this Jupyter Notebook can be viewed [here](https://nbviewer.jupyter.org/urls/dl.dropbox.com/s/1xlv5oacgobrfd4/mlperf-inference-v0.5-dividiti.ipynb).
# ## Table of Contents
# 1. [Overview](#overview)
# 1. [Includes](#includes)
# 1. [System templates](#templates)
# 1. [Firefly RK3399](#templates_firefly)
# 1. [Linaro HiKey960](#templates_hikey960)
# 1. [Huawei Mate 10 Pro](#templates_mate10pro)
# 1. [Raspberry Pi 4](#templates_rpi4)
# 1. [HP Z640](#templates_velociti)
# 1. [Default](#templates_default)
# 1. [Systems](#systems)
# 1. [Implementations](#implementations)
# 1. [Get the experimental data](#get)
# 1. [Image Classification - Closed](#get_image_classification_closed)
# 1. [Image Classification - Open](#get_image_classification_open)
# 1. [Object Detection - Open](#get_object_detection_open)
# 1. [Generate the submission checklist](#checklist)
# 1. [Check the experimental data](#check)
#
# ## Includes
# ### Standard
# In[ ]:
import os
import sys
import json
import re
from pprint import pprint
from shutil import copy2
from copy import deepcopy
# ### Scientific
# If some of the scientific packages are missing, please install them using:
# ```
# # python3 -m pip install jupyter pandas numpy matplotlib seaborn --user
# ```
# In[ ]:
import IPython as ip
import pandas as pd
import numpy as np
import matplotlib as mp
import seaborn as sb
# In[ ]:
print ('IPython version: %s' % ip.__version__)
print ('Pandas version: %s' % pd.__version__)
print ('NumPy version: %s' % np.__version__)
print ('Matplotlib version: %s' % mp.__version__)
print ('Seaborn version: %s' % sb.__version__)
# In[ ]:
from IPython.display import Image, display
def display_in_full(df):
pd.options.display.max_columns = len(df.columns)
pd.options.display.max_rows = len(df.index)
display(df)
# In[ ]:
import matplotlib.pyplot as plt
from matplotlib import cm
get_ipython().run_line_magic('matplotlib', 'inline')
# In[ ]:
default_colormap = cm.autumn
default_fontsize = 16
default_barwidth = 0.8
default_figwidth = 24
default_figheight = 3
default_figdpi = 200
default_figsize = [default_figwidth, default_figheight]
# In[ ]:
if mp.__version__[0]=='2': mp.style.use('classic')
mp.rcParams['figure.max_open_warning'] = 200
mp.rcParams['figure.dpi'] = default_figdpi
mp.rcParams['font.size'] = default_fontsize
mp.rcParams['legend.fontsize'] = 'medium'
# In[ ]:
# FIXME: Do not hardcode - locate via CK.
pythonpath_coco = '/home/anton/CK_TOOLS/tool-coco-master-gcc-8.3.0-compiler.python-3.6.9-linux-64/'
sys.path.append(pythonpath_coco)
from pycocotools.coco import COCO
# ### Collective Knowledge
# If CK is not installed, please install it using:
# ```
# # python -m pip install ck
# ```
# In[ ]:
import ck.kernel as ck
print ('CK version: %s' % ck.__version__)
#
# ## System templates
#
# ### [Firefly-RK3399](http://en.t-firefly.com/product/rk3399/)
# In[ ]:
firefly = {
"division": "",
"submitter": "dividiti",
"status": "available",
"system_name": "Firefly-RK3399 (firefly)",
"number_of_nodes": "1",
"host_processor_model_name": "Arm Cortex-A72 MP2 (big); Arm Cortex-A53 MP4 (LITTLE)",
"host_processors_per_node": "1",
"host_processor_core_count": "2 (big); 4 (LITTLE)",
"host_processor_frequency": "1800 MHz (big), 1400 MHz (LITTLE)",
"host_processor_caches": "L1I$ 48 KiB, L1D$ 32 KiB, L2$ 1 MiB (big); L1I$ 32 KiB, L1D$ 32 KiB, L2$ 512 KiB (LITTLE)",
"host_memory_configuration": "-",
"host_memory_capacity": "4 GiB",
"host_storage_capacity": "128 GiB",
"host_storage_type": "SanDisk Extreme microSD",
"host_processor_interconnect": "-",
"host_networking": "-",
"host_networking_topology": "-",
"accelerators_per_node": "1",
"accelerator_model_name": "Arm Mali-T860 MP4",
"accelerator_frequency": "800 MHz",
"accelerator_host_interconnect": "-",
"accelerator_interconnect": "-",
"accelerator_interconnect_topology": "-",
"accelerator_memory_capacity": "4 GiB (shared with host)",
"accelerator_memory_configuration": "-",
"accelerator_on-chip_memories": "-",
"cooling": "on-board fan",
"hw_notes": "http://en.t-firefly.com/product/rk3399/; http://opensource.rock-chips.com/wiki_RK3399",
"framework": "",
"operating_system": "Ubuntu 16.04.6 LTS; kernel 4.4.77 #554 (Thu Nov 30 11:30:11 HKT 2017)",
"other_software_stack": "GCC 7.4.0; Python 3.5.2; OpenCL driver 1.2 v1.r13p0-00rel0-git(a4271c9).31ba04af2d3c01618138bef3aed66c2c",
"sw_notes": "Powered by Collective Knowledge v1.11.1"
}
#
# ### [Linaro HiKey960](https://www.96boards.org/product/hikey960/)
# In[ ]:
hikey960 = {
"division": "",
"submitter": "dividiti",
"status": "available",
"system_name": "Linaro HiKey960 (hikey960)",
"number_of_nodes": "1",
"host_processor_model_name": "Arm Cortex-A73 MP4 (big); Arm Cortex-A53 MP4 (LITTLE)",
"host_processors_per_node": "1",
"host_processor_core_count": "4 (big); 4 (LITTLE)",
"host_processor_frequency": "2362 MHz (big), 1844 MHz (LITTLE)",
"host_processor_caches": "L1I$ 256=4x64 KiB, L1D$ 256=4x64 KiB, L2$ 2 MiB (big); L1I$ 128=4x32 KiB, L1D$ 128=4x32 KiB, L2$ 1 MiB (LITTLE)",
"host_memory_configuration": "-",
"host_memory_capacity": "3 GiB",
"host_storage_capacity": "128 GiB",
"host_storage_type": "SanDisk Extreme microSD",
"host_processor_interconnect": "-",
"host_networking": "-",
"host_networking_topology": "-",
"accelerators_per_node": "1",
"accelerator_model_name": "Arm Mali-G71 MP8",
"accelerator_frequency": "800 MHz",
"accelerator_host_interconnect": "-",
"accelerator_interconnect": "-",
"accelerator_interconnect_topology": "-",
"accelerator_memory_capacity": "3 GiB (shared with host)",
"accelerator_memory_configuration": "-",
"accelerator_on-chip_memories": "-",
"cooling": "small external fan",
"hw_notes": "http://www.hisilicon.com/en/Products/ProductList/Kirin",
"framework": "",
"operating_system": "Debian 9; kernel 4.19.5-hikey #26 (Thu Aug 22 07:58:35 UTC 2019)",
"other_software_stack": "GCC 7.4.0; Python 3.5.3; OpenCL driver 2.0 v1.r16p0",
"sw_notes": "Powered by Collective Knowledge v1.11.1"
}
#
# ### Huawei Mate 10 Pro
# In[ ]:
mate10pro = {
"division": "",
"submitter": "dividiti",
"status": "available",
"system_name": "Huawei Mate 10 Pro (mate10pro)",
"number_of_nodes": "1",
"host_processor_model_name": "Arm Cortex-A73 MP4 (big); Arm Cortex-A53 MP4 (LITTLE)",
"host_processors_per_node": "1",
"host_processor_core_count": "4 (big); 4 (LITTLE)",
"host_processor_frequency": "2360 MHz (big), 1800 MHz (LITTLE)",
"host_processor_caches": "L1I$ 256=4x64 KiB, L1D$ 256=4x64 KiB, L2$ 2 MiB (big); L1I$ 128=4x32 KiB, L1D$ 128=4x32 KiB, L2$ 1 MiB (LITTLE)",
"host_memory_configuration": "-",
"host_memory_capacity": "6 GiB",
"host_storage_capacity": "128 GiB",
"host_storage_type": "Flash",
"host_processor_interconnect": "-",
"host_networking": "-",
"host_networking_topology": "-",
"accelerators_per_node": "1",
"accelerator_model_name": "Arm Mali-G72 MP12",
"accelerator_frequency": "850 MHz",
"accelerator_host_interconnect": "-",
"accelerator_interconnect": "-",
"accelerator_interconnect_topology": "-",
"accelerator_memory_capacity": "6 GiB (shared with host)",
"accelerator_memory_configuration": "-",
"accelerator_on-chip_memories": "-",
"cooling": "phone case",
"hw_notes": "https://en.wikichip.org/wiki/hisilicon/kirin/970",
"framework": "",
"operating_system": "Android 9.1.0.300(C782E5R1P11); kernel 4.9.148 (Sat Jun 29 20:41:06 CST 2019)",
"other_software_stack": "Android NDK 17c (LLVM 6.0.2); OpenCL driver 2.0 v1.r14p0-00cet0.0416641283c5d6e2d53c163d0ca99357",
"sw_notes": "Powered by Collective Knowledge v1.11.1"
}
#
# ### Raspberry Pi 4
# In[ ]:
rpi4 = {
"division": "",
"submitter": "dividiti",
"status": "available",
"system_name": "Raspberry Pi 4 (rpi4)",
"number_of_nodes": "1",
"host_processor_model_name": "Arm Cortex-A72 MP4",
"host_processors_per_node": "1",
"host_processor_core_count": "4",
"host_processor_frequency": "1500 MHz",
"host_processor_caches": "L1I$ 128=4x32 KiB, L1D$ 128=4x32 KiB, L2$ 1 MiB",
"host_memory_configuration": "-",
"host_memory_capacity": "4 GiB",
"host_storage_capacity": "128 GiB",
"host_storage_type": "SanDisk Extreme Pro microSD",
"host_processor_interconnect": "-",
"host_networking": "-",
"host_networking_topology": "-",
"accelerators_per_node": "0",
"accelerator_model_name": "-",
"accelerator_frequency": "-",
"accelerator_host_interconnect": "-",
"accelerator_interconnect": "-",
"accelerator_interconnect_topology": "-",
"accelerator_memory_capacity": "-",
"accelerator_memory_configuration": "-",
"accelerator_on-chip_memories": "-",
"cooling": "http://www.raspberrypiwiki.com/index.php/Armor_Case_B",
"hw_notes": "https://www.raspberrypi.org/products/raspberry-pi-4-model-b/specifications/",
"framework": "",
"operating_system": "Raspbian Buster (Debian 10); kernel 4.19.66-v7l+ #1253 (Thu Aug 15 12:02:08 BST 2019)",
"other_software_stack": "GCC 8.3.0; Python 3.7.3",
"sw_notes": "Powered by Collective Knowledge v1.11.1"
}
#
# ### HP Z640 workstation
# In[ ]:
velociti = {
"division": "",
"submitter": "dividiti",
"status": "available",
"system_name": "HP Z640 G1X62EA workstation (velociti)",
"number_of_nodes": "1",
"host_processor_model_name": "Intel Xeon CPU E5-2650 v3",
"host_processors_per_node": "1",
"host_processor_core_count": "10",
"host_processor_frequency": "2300 MHz (base); 3000 MHz (turbo)",
"host_processor_caches": "L1I$ 10x32 KiB, L1D$ 10x32 KiB; L2$ 10x256 KiB; L3$ 25 MiB",
"host_memory_configuration": "DDR4 (max bandwidth 68 GB/s)",
"host_memory_capacity": "32 GiB",
"host_storage_capacity": "512 GiB",
"host_storage_type": "SSD",
"host_processor_interconnect": "-",
"host_networking": "-",
"host_networking_topology": "-",
"accelerators_per_node": "1",
"accelerator_model_name": "NVIDIA GeForce GTX 1080",
"accelerator_frequency": "1607 MHz (base); 1733 MHz (boost)",
"accelerator_host_interconnect": "-",
"accelerator_interconnect": "-",
"accelerator_interconnect_topology": "-",
"accelerator_memory_capacity": "8 GiB",
"accelerator_memory_configuration": "GDDR5X (max bandwidth 320 GB/s)",
"accelerator_on-chip_memories": "20x48 KiB",
"cooling": "standard",
"hw_notes": "The Intel CPU has reached its end-of-life (EOL). http://h20195.www2.hp.com/v2/default.aspx?cc=ie&lc=en&oid=7528701; https://ark.intel.com/products/81705/Intel-Xeon-Processor-E5-2650-v3-25M-Cache-2_30-GHz; http://www.cpu-world.com/CPUs/Xeon/Intel-Xeon%20E5-2650%20v3.html; http://www.geforce.co.uk/hardware/10series/geforce-gtx-1080/",
"framework": "TensorFlow v1.14",
"operating_system": "Ubuntu 16.04.6 LTS; kernel 4.4.0-112-generic #135-Ubuntu SMP (Fri Jan 19 11:48:36 UTC 2018)",
"other_software_stack": "Driver 430.50; CUDA 10.1; TensorRT 5.1.5; Docker 19.03.3 (build a872fc2); GCC 7.4.0; Python 3.5.2",
"sw_notes": "Powered by Collective Knowledge v1.11.4"
}
#
# ### Default
# In[ ]:
# Default `system_desc_id.json` (to catch uninitialized descriptions)
default_system_json = {
"division": "reqired",
"submitter": "required",
"status": "required",
"system_name": "required",
"number_of_nodes": "required",
"host_processor_model_name": "required",
"host_processors_per_node": "required",
"host_processor_core_count": "required",
"host_processor_frequency": "",
"host_processor_caches": "",
"host_memory_configuration": "",
"host_memory_capacity": "required",
"host_storage_capacity": "required",
"host_storage_type": "required",
"host_processor_interconnect": "",
"host_networking": "",
"host_networking_topology": "",
"accelerators_per_node": "required",
"accelerator_model_name": "required",
"accelerator_frequency": "",
"accelerator_host_interconnect": "",
"accelerator_interconnect": "",
"accelerator_interconnect_topology": "",
"accelerator_memory_capacity": "required",
"accelerator_memory_configuration": "",
"accelerator_on-chip_memories": "",
"cooling": "",
"hw_notes": "",
"framework": "required",
"operating_system": "required",
"other_software_stack": "required",
"sw_notes": ""
}
#
# ## Systems
# In[ ]:
# Generate division_systems dictionary.
division_systems = {}
platform_templates = {
'firefly' : firefly,
'hikey960' : hikey960,
'mate10pro' : mate10pro,
'rpi4' : rpi4,
'velociti' : velociti
}
divisions = [ 'open', 'closed' ]
platforms = [ 'firefly', 'hikey960', 'mate10pro', 'rpi4', 'velociti' ]
for division in divisions:
for platform in platforms:
if platform == 'velociti':
libraries = [ 'tensorflow-v1.14' ]
elif platform == 'mate10pro':
libraries = [ 'tflite-v1.13', 'armnn-v19.08' ]
else:
libraries = [ 'tflite-v1.15', 'armnn-v19.08' ]
for library in libraries:
if library == 'armnn-v19.08':
if platform == 'rpi4':
backends = [ 'neon' ]
else:
backends = [ 'neon', 'opencl' ]
library_backends = [ library+'-'+backend for backend in backends ]
elif library == 'tensorflow-v1.14':
backends = [ 'cpu', 'cuda', 'tensorrt', 'tensorrt-dynamic' ]
library_backends = [ library+'-'+backend for backend in backends ]
else:
library_backends = [ library ]
for library_backend in library_backends:
division_system = division+'-'+platform+'-'+library_backend
frameworks = {
'armnn-v19.08-opencl' : 'ArmNN v19.08 (OpenCL)',
'armnn-v19.08-neon' : 'ArmNN v19.08 (Neon)',
'tflite-v1.13': 'TFLite v1.13.1',
'tflite-v1.15': 'TFLite v1.15.0-rc2',
'tensorflow-v1.14-cpu': 'TensorFlow v1.14 (CPU)',
'tensorflow-v1.14-cuda': 'TensorFlow v1.14 (CUDA)',
'tensorflow-v1.14-tensorrt': 'TensorFlow v1.14 (TensorRT-static)',
'tensorflow-v1.14-tensorrt-dynamic': 'TensorFlow v1.14 (TensorRT-dynamic)',
}
template = deepcopy(platform_templates[platform])
template.update({
'division' : division,
'submitter' : 'dividiti', # 'dividiti' if platform != 'velociti' else 'dividiti, Politecnico di Milano'
'status' : 'available' if library_backend != 'tensorflow-v1.14-cpu' else 'RDI',
'framework' : frameworks[library_backend]
})
if (not library_backend.startswith('tensorflow') and not library_backend.endswith('opencl')) or library_backend.endswith('cpu'):
template.update({
'accelerator_frequency' : '-',
'accelerator_memory_capacity' : '-',
'accelerator_memory_configuration': '-',
'accelerator_model_name' : '-',
'accelerator_on-chip_memories': '-',
'accelerators_per_node' : '0',
})
division_systems[division_system] = template
print("=" * 100)
print(division_system)
print("=" * 100)
pprint(template)
print("-" * 100)
print("")
#
# ## Implementations
# ### Image classification
# In[ ]:
# Generate implementation_benchmarks dictionary.
implementation_benchmarks = {}
# Default `system_desc_id_imp.json` (to catch uninitialized descriptions)
default_implementation_benchmark_json = {
"input_data_types": "required",
"retraining": "required",
"starting_weights_filename": "required",
"weight_data_types": "required",
"weight_transformations": "required"
}
# For each image classification implementation.
for implementation in [ 'image-classification-tflite', 'image-classification-armnn-tflite' ]:
# Add MobileNet.
implementation_mobilenet = implementation+'-'+'mobilenet'
implementation_benchmarks[implementation_mobilenet] = {
"input_data_types": "fp32",
"weight_data_types": "fp32",
"retraining": "no",
"starting_weights_filename": "https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224.tgz",
"weight_transformations": "TFLite"
}
# Add MobileNet quantized.
implementation_mobilenet_quantized = implementation+'-'+'mobilenet-quantized'
implementation_benchmarks[implementation_mobilenet_quantized] = {
"input_data_types": "uint8",
"weight_data_types": "uint8",
"retraining": "no",
"starting_weights_filename": "https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224_quant.tgz",
"weight_transformations": "TFLite"
}
# Add ResNet.
implementation_resnet = implementation+'-'+'resnet'
implementation_benchmarks[implementation_resnet] = {
"input_data_types": "fp32",
"weight_data_types": "fp32",
"retraining": "no",
"starting_weights_filename": "https://zenodo.org/record/2535873/files/resnet50_v1.pb",
"weight_transformations": "TF -> TFLite"
}
# Add any MobileNets-v1,v2 model.
def add_implementation_mobilenet(implementation_benchmarks, version, multiplier, resolution, quantized=False):
base_url = 'https://zenodo.org/record/2269307/files' if version == 1 else 'https://zenodo.org/record/2266646/files'
url = '{}/mobilenet_v{}_{}_{}{}.tgz'.format(base_url, version, multiplier, resolution, '_quant' if quantized else '')
benchmark = 'mobilenet-v{}-{}-{}{}'.format(version, multiplier, resolution, '-quantized' if quantized else '')
if quantized and (version != 1 or implementation != 'image-classification-tflite'):
return
if implementation == 'image-classification-tflite':
weights_transformations = 'TFLite'
elif implementation == 'image-classification-armnn-tflite':
weights_transformations = 'TFLite -> ArmNN'
else:
raise "Unknown implementation '%s'!" % implementation
implementation_benchmark = implementation+'-'+benchmark
implementation_benchmarks[implementation_benchmark] = {
"input_data_types": "uint8" if quantized else "fp32",
"weight_data_types": "uint8" if quantized else "fp32",
"retraining": "no",
"starting_weights_filename": url,
"weight_transformations": weights_transformations
}
return
# MobileNet-v1.
version = 1
for multiplier in [ 1.0, 0.75, 0.5, 0.25 ]:
for resolution in [ 224, 192, 160, 128 ]:
add_implementation_mobilenet(implementation_benchmarks, version, multiplier, resolution, quantized=False)
add_implementation_mobilenet(implementation_benchmarks, version, multiplier, resolution, quantized=True)
# MobileNet-v2.
version = 2
for multiplier in [ 1.0, 0.75, 0.5, 0.35 ]:
for resolution in [ 224, 192, 160, 128, 96 ]:
add_implementation_mobilenet(implementation_benchmarks, version, multiplier, resolution)
add_implementation_mobilenet(implementation_benchmarks, version=2, multiplier=1.3, resolution=224)
add_implementation_mobilenet(implementation_benchmarks, version=2, multiplier=1.4, resolution=224)
# ### Object detection
# In[ ]:
object_detection_benchmarks = {
'rcnn-nas-lowproposals' : {
"name" : "Faster-RCNN-NAS lowproposals",
"url" : "http://download.tensorflow.org/models/object_detection/faster_rcnn_nas_lowproposals_coco_2018_01_28.tar.gz",
"width" : 1200,
"height" : 1200,
},
'rcnn-resnet50-lowproposals' : {
"name" : "Faster-RCNN-ResNet50 lowproposals",
"url" : "http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_lowproposals_coco_2018_01_28.tar.gz",
"width" : 1024,
"height" : 600,
},
'rcnn-resnet101-lowproposals' : {
"name" : "Faster-RCNN-ResNet101 lowproposals",
"url" : "http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet101_lowproposals_coco_2018_01_28.tar.gz",
"width" : 1024,
"height" : 600,
},
'rcnn-inception-resnet-v2-lowproposals' : {
"name" : "Faster-RCNN-Inception-ResNet-v2 lowproposals",
"url" : "http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_resnet_v2_atrous_lowproposals_coco_2018_01_28.tar.gz",
"width" : 1024,
"height" : 600,
},
'rcnn-inception-v2' : {
"name" : "Faster-RCNN Inception-v2",
"url" : "http://download.tensorflow.org/models/object_detection/faster_rcnn_inception_v2_coco_2018_01_28.tar.gz",
"width" : 1024,
"height" : 600,
},
'ssd-inception-v2' : {
"name" : "SSD-Inception-v2",
"url" : "http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz",
"width" : 300,
"height" : 300,
},
'ssd-mobilenet-v1-quantized-mlperf' : {
"name" : "MLPerf SSD-MobileNet",
"url" : "https://zenodo.org/record/3361502/files/ssd_mobilenet_v1_coco_2018_01_28.tar.gz",
"width" : 300,
"height" : 300,
"provenance" : "Google",
},
'ssd-mobilenet-v1-non-quantized-mlperf' : {
"name" : "MLPerf SSD-MobileNet quantized",
"url" : "https://zenodo.org/record/3252084/files/mobilenet_v1_ssd_8bit_finetuned.tar.gz",
"width" : 300,
"height" : 300,
"provenance" : "Habana"
},
'ssd-mobilenet-v1-fpn' : {
"name" : "SSD-MobileNet-v1 FPN SBP",
"url" : "http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz",
"width" : 640,
"height" : 640,
},
'ssd-resnet50-fpn' : {
"name" : "SSD-ResNet50-v1 FPN SBP",
"url" : "http://download.tensorflow.org/models/object_detection/ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz",
"width" : 640,
"height" : 640,
},
'ssdlite-mobilenet-v2' : {
"name" : "SSDLite-MobileNet-v2",
"url" : "http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz",
"width" : 300,
"height" : 300,
},
'yolo-v3' : {
"name" : "YOLO-v3",
"url" : "https://zenodo.org/record/3386327/files/yolo_v3_coco.tar.gz",
"width" : 416,
"height" : 416,
"provenance" : "https://github.com/YunYang1994/tensorflow-yolov3/"
}
}
# For each object detection implementation.
for implementation in [ 'mlperf-inference-vision' ]:
for benchmark in object_detection_benchmarks.keys():
implementation_benchmark = implementation+'-'+benchmark
implementation_benchmarks[implementation_benchmark] = {
"input_data_types": "fp32",
"weight_data_types": "fp32",
"retraining": "no",
"starting_weights_filename": object_detection_benchmarks[benchmark]['url'],
# "name" : object_detection_benchmarks[benchmark]['name'], # submission checker complains about "unknwon field name"
"weight_transformations": "None (TensorFlow)"
}
# from pprint import pprint
# pprint(implementation_benchmarks)
# In[ ]:
implementation_readmes = {}
implementation_readmes['image-classification-tflite'] = """# MLPerf Inference - Image Classification - TFLite
This C++ implementation uses TFLite to run TFLite models for Image Classification on CPUs.
## Links
- [Jupyter notebook](https://nbviewer.jupyter.org/urls/dl.dropbox.com/s/1xlv5oacgobrfd4/mlperf-inference-v0.5-dividiti.ipynb)
- [Source code](https://github.com/ctuning/ck-mlperf/tree/master/program/image-classification-tflite-loadgen).
- [Instructions](https://github.com/mlperf/inference/blob/master/v0.5/classification_and_detection/optional_harness_ck/classification/tflite/README.md).
"""
implementation_readmes['image-classification-armnn-tflite'] = """# MLPerf Inference - Image Classification - ArmNN-TFLite
This C++ implementation uses ArmNN with the TFLite frontend to run TFLite models for Image Classification on Arm Cortex CPUs and Arm Mali GPUs.
## Links
- [Jupyter notebook](https://nbviewer.jupyter.org/urls/dl.dropbox.com/s/1xlv5oacgobrfd4/mlperf-inference-v0.5-dividiti.ipynb)
- [Source code](https://github.com/ctuning/ck-mlperf/tree/master/program/image-classification-armnn-tflite-loadgen).
- [Instructions](https://github.com/ARM-software/armnn-mlperf/blob/master/README.md).
"""
implementation_readmes['mlperf-inference-vision'] = """# MLPerf Inference - Object Detection - TensorFlow
This Python implementation is the official MLPerf Inference vision application, modified to support other
object detection models and run with TensorRT.
## Links
- [CK wrapper](https://github.com/ctuning/ck-object-detection/tree/master/program/mlperf-inference-vision).
- [vision_with_ck branch in dividiti's fork of mlperf/inference](https://github.com/dividiti/inference/tree/vision_with_ck).
- [Docker image with instructions](https://github.com/ctuning/ck-mlperf/tree/master/docker/mlperf-inference-vision-with-ck.tensorrt.ubuntu-18.04).
- [Jupyter notebook](https://nbviewer.jupyter.org/urls/dl.dropbox.com/s/1xlv5oacgobrfd4/mlperf-inference-v0.5-dividiti.ipynb)
"""
# In[ ]:
implementation_paths = {}
for implementation in [ 'image-classification-tflite', 'image-classification-armnn-tflite', 'mlperf-inference-vision' ]:
implementation_uoa = implementation
if implementation.startswith('image-classification'):
implementation_uoa += '-loadgen'
repo_uoa = 'ck-mlperf'
else: # TODO: move to ck-mlperf, then no need for special case.
repo_uoa = 'ck-object-detection'
r = ck.access({'action':'find', 'repo_uoa':repo_uoa, 'module_uoa':'program', 'data_uoa':implementation_uoa})
if r['return']>0:
print('Error: %s' % r['error'])
exit(1)
implementation_paths[implementation] = r['path']
# In[ ]:
measurements_readmes = {}
task = 'image-classification'
for division_upper in [ 'Closed', 'Open' ]:
division_lower = division_upper.lower()
measurements_readmes[division_lower+'-'+task] = '''# MLPerf Inference - {} Division - Image Classification
We performed our measurements using automated, customizable, portable and reproducible
[Collective Knowledge](http://cknowledge.org) workflows. Our workflows automatically
install dependencies (models, datasets, etc.), preprocess input data in the correct way,
and so on.
## CK repositories
As CK is always evolving, it is hard to pin particular revisions of all repositories.
The most relevant repositories and their latest revisions on the submission date (11/Oct/2019):
- [ck-mlperf](https://github.com/ctuning/ck-mlperf) @ [ee77cfd](https://github.com/ctuning/ck-mlperf/commit/ee77cfd3ddfa30739a8c2f483fe9ba83a233a000) (contains programs integrated with LoadGen, model packages and scripts).
- [ck-env](https://github.com/ctuning/ck-env) @ [f9ac337](https://github.com/ctuning/ck-env/commit/f9ac3372cdc82fa46b2839e45fc67848ab4bac03) (contains dataset descriptions, preprocessing methods, etc.)
- [ck-tensorflow](https://github.com/ctuning/ck-tensorflow) @ [eff8bec](https://github.com/ctuning/ck-tensorflow/commit/eff8bec192021162e4a336dbd3e795afa30b7d26) (contains TFLite packages).
- [armnn-mlperf](https://github.com/arm-software/armnn-mlperf) @ [42f44a2](https://github.com/ARM-software/armnn-mlperf/commit/42f44a266b6b4e04901255f46f6d34d12589208f) (contains ArmNN/ArmCL packages).
## Links
- [Bash script](https://github.com/ctuning/ck-mlperf/tree/master/script/mlperf-inference-v0.5.{}.image-classification) used to invoke benchmarking on Linux systems or Android devices.
'''.format(division_upper, division_lower)
task = 'object-detection'
for division_upper in [ 'Closed', 'Open' ]:
division_lower = division_upper.lower()
measurements_readmes[division_lower+'-'+task] = '''# MLPerf Inference - {} Division - Object Detection
We performed our measurements using automated, customizable, portable and reproducible
[Collective Knowledge](http://cknowledge.org) workflows. Our workflows automatically
install dependencies (models, datasets, etc.), preprocess input data in the correct way,
and so on.
## CK repositories
As CK is always evolving, it is hard to pin particular revisions of all repositories.
The most relevant repositories and their latest revisions on the submission date (18/Oct/2019):
- [ck-mlperf](https://github.com/ctuning/ck-mlperf) @ [ef1fced](https://github.com/ctuning/ck-mlperf/commit/ef1fcedd495fd03b5ad6d62d62c8ba271854f2ad) (contains the CK program wrapper, MLPerf SSD-MobileNet model packages and scripts).
- [ck-object-detection](https://github.com/ctuning/ck-object-detection) @ [780d328](https://github.com/ctuning/ck-object-detection/commit/780d3288ec19656cb60c5ad39b2486bbf0fbf97a) (contains most model packages)
- [ck-env](https://github.com/ctuning/ck-env) @ [5af9fbd](https://github.com/ctuning/ck-env/commit/5af9fbd93ad6c6465b631716645ad9442a333442) (contains dataset descriptions, preprocessing methods, etc.)
## Links
- [Docker image with instructions](https://github.com/ctuning/ck-mlperf/tree/master/docker/mlperf-inference-vision-with-ck.tensorrt.ubuntu-18.04).
- [Bash script](https://github.com/ctuning/ck-mlperf/tree/master/script/mlperf-inference-v0.5.{}.object-detection) used to invoke benchmarking via the Docker image.
'''.format(division_upper, division_lower)
# In[ ]:
# Snapshot of https://github.com/dividiti/inference/blob/61220457dec221ed1984c62bd9d382698bd71bc6/v0.5/mlperf.conf
mlperf_conf_6122045 = '''
# The format of this config file is 'key = value'.
# The key has the format 'model.scenario.key'. Value is mostly int64_t.
# Model maybe '*' as wildcard. In that case the value applies to all models.
# All times are in milli seconds
*.SingleStream.target_latency = 10
*.SingleStream.target_latency_percentile = 90
*.SingleStream.min_duration = 60000
*.SingleStream.min_query_count = 1024
*.MultiStream.target_qps = 20
*.MultiStream.target_latency_percentile = 99
*.MultiStream.samples_per_query = 4
*.MultiStream.max_async_queries = 1
*.MultiStream.target_latency = 50
*.MultiStream.min_duration = 60000
*.MultiStream.min_query_count = 270336
ssd-resnet34.MultiStream.target_qps = 15
ssd-resnet34.MultiStream.target_latency = 66
gnmt.MultiStream.min_query_count = 90112
gnmt.MultiStream.target_latency = 100
gnmt.MultiStream.target_qps = 10
gnmt.MultiStream.target_latency_percentile = 97
*.Server.target_qps = 1.0
*.Server.target_latency = 10
*.Server.target_latency_percentile = 99
*.Server.target_duration = 0
*.Server.min_duration = 60000
*.Server.min_query_count = 270336
resnet50.Server.target_latency = 15
ssd-resnet34.Server.target_latency = 100
gnmt.Server.min_query_count = 90112
gnmt.Server.target_latency = 250
gnmt.Server.target_latency_percentile = 97
*.Offline.target_qps = 1.0
*.Offline.target_latency_percentile = 90
*.Offline.min_duration = 60000
*.Offline.min_query_count = 1
'''
#
# ## Get the experimental data
# Download experimental data and add CK repositories as follows.
#
# ### Image Classification - Closed (MobileNet, ResNet)
# #### `firefly`
# ```
# $ wget https://www.dropbox.com/s/3md826fk7k1taf3/mlperf.closed.image-classification.firefly.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.closed.image-classification.firefly.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/jusoz329mhixpxm/mlperf.closed.image-classification.firefly.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.closed.image-classification.firefly.armnn-v19.08.neon.zip
#
# $ wget https://www.dropbox.com/s/08lzbz7jl2w5jhu/mlperf.closed.image-classification.firefly.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.closed.image-classification.firefly.armnn-v19.08.opencl.zip
# ```
# #### `hikey960`
# ```
# $ wget https://www.dropbox.com/s/lqnffl6wbaeceul/mlperf.closed.image-classification.hikey960.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.closed.image-classification.hikey960.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/6m6uv1d33yc82f8/mlperf.closed.image-classification.hikey960.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.closed.image-classification.hikey960.armnn-v19.08.neon.zip
#
# $ wget https://www.dropbox.com/s/bz56y4damfqggr8/mlperf.closed.image-classification.hikey960.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.closed.image-classification.hikey960.armnn-v19.08.opencl.zip
# ```
# #### `rpi4`
# ```
# $ wget https://www.dropbox.com/s/ig97x9cqoxfs3ne/mlperf.closed.image-classification.rpi4.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.closed.image-classification.rpi4.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/ohcuyes409h66tx/mlperf.closed.image-classification.rpi4.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.closed.image-classification.rpi4.armnn-v19.08.neon.zip
# ```
# #### `mate10pro`
# ```
# $ wget https://www.dropbox.com/s/r7hss1sd0268b9j/mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.zip
#
# $ wget https://www.dropbox.com/s/iflzxbxcv3qka9x/mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.zip
# ```
# **NB:** We aborted the ResNet accuracy experiment with TFLite, as it was estimated to take 17 hours.
# #### `mate10pro` (only for testing the checker)
# ##### BAD_LOADGEN
# ```
# $ wget https://www.dropbox.com/s/nts8e7unb7vm68f/mlperf.closed.image-classification.mate10pro.tflite-v1.13.mobilenet.BAD_LOADGEN.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.tflite-v1.13.mobilenet.BAD_LOADGEN.zip
# ```
# ##### BAD_RESNET
# ```
# $ wget https://www.dropbox.com/s/bi2owxxpcfm6n2s/mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.BAD_RESNET.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.BAD_RESNET.zip
#
# $ wget https://www.dropbox.com/s/t2o2elqdyitqlpi/mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.BAD_RESNET.zip
# $ ck add repo --zip=mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.BAD_RESNET.zip
# ```
#
# ### Image Classification - Open (MobileNets-v1,v2)
# #### `firefly`
# ```
# $ wget https://www.dropbox.com/s/q8ieqgnr3zn6w4y/mlperf.open.image-classification.firefly.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.open.image-classification.firefly.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/zpenduz1i4qt651/mlperf.open.image-classification.firefly.tflite-v1.15.mobilenet-v1-quantized.zip
# $ ck add repo --zip=mlperf.open.image-classification.firefly.tflite-v1.15.mobilenet-v1-quantized.zip
#
# $ wget https://www.dropbox.com/s/3mmefvxc15m9o5b/mlperf.open.image-classification.firefly.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.open.image-classification.firefly.armnn-v19.08.opencl.zip
#
# $ wget https://www.dropbox.com/s/hrupp4o4apo3dfa/mlperf.open.image-classification.firefly.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.open.image-classification.firefly.armnn-v19.08.neon.zip
# ```
# #### `hikey960`
# ```
# $ wget https://www.dropbox.com/s/2gbbpsd2pjurvc8/mlperf.open.image-classification.hikey960.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.open.image-classification.hikey960.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/rmttjnxzih9snzh/mlperf.open.image-classification.hikey960.tflite-v1.15.mobilenet-v1-quantized.zip
# $ ck add repo --zip=mlperf.open.image-classification.hikey960.tflite-v1.15.mobilenet-v1-quantized.zip
#
# $ wget https://www.dropbox.com/s/m5illg8i2tse5hg/mlperf.open.image-classification.hikey960.armnn-v19.08.opencl.zip
# $ ck add repo --zip=mlperf.open.image-classification.hikey960.armnn-v19.08.opencl.zip
#
# $ wget https://www.dropbox.com/s/3cujqfe4ps0g66h/mlperf.open.image-classification.hikey960.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.open.image-classification.hikey960.armnn-v19.08.neon.zip
# ```
# #### `rpi4`
# ```
# $ wget https://www.dropbox.com/s/awhdqjq3p4tre2q/mlperf.open.image-classification.rpi4.tflite-v1.15.zip
# $ ck add repo --zip=mlperf.open.image-classification.rpi4.tflite-v1.15.zip
#
# $ wget https://www.dropbox.com/s/rf8vsg5firhjzf8/mlperf.open.image-classification.rpi4.tflite-v1.15.mobilenet-v1-quantized.zip
# $ ck add repo --zip=mlperf.open.image-classification.rpi4.tflite-v1.15.mobilenet-v1-quantized.zip
#
# $ wget https://www.dropbox.com/s/0oketvqml7gyzl0/mlperf.open.image-classification.rpi4.armnn-v19.08.neon.zip
# $ ck add repo --zip=mlperf.open.image-classification.rpi4.armnn-v19.08.neon.zip
# ```
# #### `mate10pro`
# ```
# $ wget https://www.dropbox.com/s/avi6h9m2demz5zr/mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet.zip
# $ ck add repo --zip=mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet.zip
#
# $ wget https://www.dropbox.com/s/soaw27zcjb8hhww/mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet-v1-quantized.zip
# $ ck add repo --zip=mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet-v1-quantized.zip
# ```
# **NB:** `mate10pro.tflite-v1.13.mobilenet` would have been a perfectly valid closed submission, just finished a little bit late after the deadline. `mate10pro.tflite-v1.13.mobilenet-quantized` is an open submission alright, as dividiti hadn't declared submitting quantized results before the deadline.
#
# ### Object Detection - Open
# #### `velociti`
# ```
# $ wget https://www.dropbox.com/s/wiea3a8zf077jsv/mlperf.open.object-detection.velociti.zip
# $ ck add repo --zip=mlperf.open.object-detection.velociti.zip
# ```
#
# ## Generate the submission checklist
# In[ ]:
checklist_template = """MLPerf Inference 0.5 Self-Certification Checklist
Name of Certifying Engineer(s): %(name)s
Email of Certifying Engineer(s): %(email)s
Name of System(s) Under Test: %(system_name)s
Division (check one):
- [%(open)s] Open
- [%(closed)s] Closed
Category (check one):
- [%(category_available)s] Available
- [%(category_preview)s] Preview
- [%(category_rdi)s] Research, Development, and Internal (RDI)
Benchmark (check one):
- [%(benchmark_mobilenet)s] MobileNet
- [ ] SSD-MobileNet
- [%(benchmark_resnet)s] ResNet
- [ ] SSD-1200
- [ ] NMT
- [%(benchmark_other)s] Other, please specify: %(benchmark_other_specify)s
Please fill in the following tables adding lines as necessary:
97%%-tile latency is required for NMT only. 99%%-tile is required for all other models.
### Single Stream Results Table
| SUT Name | Benchmark | Query Count | Accuracy |
|----------|-----------|-------------|----------|
| %(system)s | %(benchmark)s | %(query_count)s | %(accuracy_pc)s%% |
### Multi-Stream Results Table
| SUT Name | Benchmark | Query Count | Accuracy | 97%%-tile Latency | 99%%-tile Latency |
|----------|-----------|-------------|-----------|------------------|------------------|
| | | | | | |
### Server Results Table
| SUT Name | Benchmark | Query Count | Accuracy | 97%%-tile Latency | 99%%-tile Latency |
|----------|-----------|-------------|----------|------------------|------------------|
| | | | | | |
### Offline Results Table
| SUT Name | Benchmark | Sample Count | Accuracy |
|----------|-----------|--------------|----------|
| | | | |
Scenario (check all that apply):
- [%(scenario_singlestream)s] Single-Stream
- [%(scenario_multistream)s] Multi-Stream
- [%(scenario_server)s] Server
- [%(scenario_offline)s] Offline
For each SUT, does the submission meet the latency target for each
combination of benchmark and scenario? (check all that apply)
- [x] Yes (Single-Stream and Offline no requirements)
- [ ] Yes (MobileNet x Multi-Stream 50 ms @ 99%%)
- [ ] Yes (MobileNet x Server 10 ms @ 99%%)
- [ ] Yes (SSD-MobileNet x Multi-Stream 50 ms @ 99%%)
- [ ] Yes (SSD-MobileNet x Server 10 ms @ 99%%)
- [ ] Yes (ResNet x Multi-Stream 50 ms @ 99%%)
- [ ] Yes (ResNet x Server 15 ms @ 99%%)
- [ ] Yes (SSD-1200 x Multi-Stream 66 ms @ 99%%).
- [ ] Yes (SSD-1200 x Server 100 ms @ 99%%)
- [ ] Yes (NMT x Multi-Stream 100 ms @ 97%%)
- [ ] Yes (NMT x Server 250 ms @ 97%%)
- [ ] No
For each SUT, is the appropriate minimum number of queries or samples
met, depending on the Scenario x Benchmark? (check all that apply)
- [x] Yes (Single-Stream 1,024 queries)
- [ ] Yes (Offline 24,576 samples)
- [ ] Yes (NMT Server and Multi-Stream 90,112 queries)
- [ ] Yes (Image Models Server and Multi-Stream 270,336 queries)
- [ ] No
For each SUT and scenario, is the benchmark accuracy target met?
(check all that apply)
- [%(mobilenet_accuracy_met)s] Yes (MobileNet 71.68%% x 98%%)
- [ ] Yes (SSD-MobileNet 0.22 mAP x 99%%)
- [%(resnet_accuracy_met)s] Yes (ResNet 76.46%% x 99%%)
- [ ] Yes (SSD-1200 0.20 mAP x 99%%)
- [ ] Yes (NMT 23.9 BLEU x 99%%)
- [%(accuracy_not_met)s] No
For each SUT and scenario, did the submission run on the whole
validation set in accuracy mode? (check one)
- [x] Yes
- [ ] No
How many samples are loaded into the QSL in performance mode?
%(performance_sample_count)s
For each SUT and scenario, does the number of loaded samples in the
QSL in performance mode meet the minimum requirement? (check all that
apply)
- [%(performance_sample_count_1024)s] Yes (ResNet and MobileNet 1,024 samples)
- [%(performance_sample_count_256)s] Yes (SSD-MobileNet 256 samples)
- [%(performance_sample_count_64)s] Yes (SSD-1200 64 samples)
- [ ] Yes (NMT 3,903,900 samples)
- [%(performance_sample_count_not_met)s] No
For each SUT and scenario, is the experimental duration greater than
or equal to 60 seconds? (check one)
- [x] Yes
- [ ] No
Does the submission use LoadGen? (check one)
- [x] Yes
- [ ] No
Is your loadgen commit from one of these allowed commit hashes?
- [%(revision_61220457de)s] 61220457dec221ed1984c62bd9d382698bd71bc6
- [%(revision_5684c11e39)s] 5684c11e3987b614aae830390fa0e92f56b7e800
- [%(revision_55c0ea4e77)s] 55c0ea4e772634107f3e67a6d0da61e6a2ca390d
- [%(revision_d31c18fbd9)s] d31c18fbd9854a4f1c489ca1bc4cd818e48f2bc5
- [%(revision_1d0e06e54a)s] 1d0e06e54a7d763cf228bdfd8b1e987976e4222f
- [%(revision_other)s] Other, please specify: %(revision_other_specify)s
Do you have any additional change to LoadGen? (check one)
- [ ] Yes, please specify:
- [x] No
Does the submission run the same code in accuracy and performance
modes? (check one)
- [x] Yes
- [ ] No
Where is the LoadGen trace stored? (check one)
- [x] Host DRAM
- [ ] Other, please specify:
For the submitted result, what is the QSL random number generator seed?
- [x] 0x2b7e151628aed2a6ULL (3133965575612453542)
- [ ] Other, please specify:
For the submitted results, what is the sample index random number generator seed?
- [x] 0x093c467e37db0c7aULL (665484352860916858)
- [ ] Other, please specify:
For the submitted results, what is the schedule random number generator seed?
- [x] 0x3243f6a8885a308dULL (3622009729038561421)
- [ ] Other, please specify:
For each SUT and scenario, is the submission run the correct number of
times for the relevant scenario? (check one)
- [x] Yes (Accuracy 1x Performance 1x Single-Stream, Multi-Stream, Offline)
- [ ] Yes (Accuracy 1x Performance 5x Server)
- [ ] No
Are the weights calibrated using data outside of the calibration set?
(check one)
- [ ] Yes
- [x] No
What untimed pre-processing does the submission use? (check all that apply)
- [x] Resize
- [ ] Reorder channels or transpose
- [ ] Pad
- [x] A single crop
- [x] Mean subtraction and normalization
- [ ] Convert to whitelisted format
- [ ] No pre-processing
- [ ] Other, please specify:
What numerics does the submission use? (check all that apply)
- [ ] INT4
- [ ] INT8
- [ ] INT16
- [%(numerics_uint8)s] UINT8
- [ ] UINT16
- [ ] FP11
- [ ] FP16
- [ ] BF16
- [%(numerics_fp32)s] FP32
- [ ] Other, please specify:
Which of the following techniques does the submission use? (check all that apply)
- [ ] Wholesale weight replacement
- [ ] Weight supplements
- [ ] Discarding non-zero weight elements
- [ ] Pruning
- [ ] Caching queries
- [ ] Caching responses
- [ ] Caching intermediate computations
- [ ] Modifying weights during the timed portion of an inference run
- [ ] Weight quantization algorithms that are similar in size to the
non-zero weights they produce
- [ ] Hard coding the total number of queries
- [ ] Techniques that boost performance for fixed length experiments but
are inapplicable to long-running services except in the offline
scenario
- [ ] Using knowledge of the LoadGen implementation to predict upcoming
lulls or spikes in the server scenario
- [ ] Treating beams in a beam search differently. For example,
employing different precision for different beams
- [ ] Changing the number of beams per beam search relative to the reference
- [ ] Incorporating explicit statistical information about the performance or accuracy sets
- [ ] Techniques that take advantage of upsampled images.
- [ ] Techniques that only improve performance when there are identical samples in a query.
- [x] None of the above
Is the submission congruent with all relevant MLPerf rules?
- [x] Yes
- [ ] No
For each SUT, does the submission accurately reflect the real-world
performance of the SUT?
- [x] Yes
- [ ] No"""
def get_checklist(checklist_template=checklist_template, name='Anton Lokhmotov', email='anton@dividiti.com',
system='rpi4-tflite-v1.15', system_name='Raspberry Pi 4 (rpi4)', revision='61220457de',
division='closed', category='available', task='image-classification', benchmark='mobilenet', scenario='singlestream',
performance_sample_count=1024, performance_sample_count_met=True,
accuracy_pc=12.345, accuracy_met=True, numerics='fp32'):
def tick(var): return "x" if var else " "
print("=" * 100)
print(system)
print("=" * 100)
revision_other = revision not in [ '61220457de', '5684c11e39', '55c0ea4e77', 'd31c18fbd9', '1d0e06e54a' ]
benchmark_other = benchmark not in [ 'mobilenet', 'resnet']
if benchmark=='mobilenet':
accuracy_met = accuracy_pc >= 71.676*0.98
elif benchmark=='resnet':
accuracy_met = accuracy_pc >= 76.456*0.99
else:
accuracy_met = accuracy_met and accuracy_pc > 0
checklist = checklist_template % {
"name" : name,
"email" : email,
"system_name": system_name,
# Division.
"closed" : tick(division=='closed'),
"open" : tick(division=='open'),
# Division.
"category_available" : tick(category.lower()=='available'),
"category_preview" : tick(category.lower()=='preview'),
"category_rdi" : tick(category.lower()=='rdi'),
# Benchmark.
"benchmark_mobilenet": tick(benchmark=='mobilenet'),
"benchmark_resnet": tick(benchmark=='resnet'),
"benchmark_other": tick(benchmark_other),
"benchmark_other_specify": benchmark if benchmark_other else '',
# Table.
"system" : system,
"benchmark" : benchmark,
"query_count": 50000 if task=='image-classification' else 5000,
"accuracy_pc" : "%.3f" % accuracy_pc,
# Scenario.
"scenario_singlestream": tick(scenario=='singlestream'),
"scenario_multistream": tick(scenario=='multistream'),
"scenario_server": tick(scenario=='server'),
"scenario_offline": tick(scenario=='offline'),
# Accuracy.
"mobilenet_accuracy_met" : tick(benchmark=='mobilenet' and accuracy_met),
"resnet_accuracy_met" : tick(benchmark=='resnet' and accuracy_met),
"accuracy_not_met" : tick(not accuracy_met),
# "How many samples are loaded into the QSL in performance mode?"
"performance_sample_count": performance_sample_count,
"performance_sample_count_1024": tick(performance_sample_count==1024),
"performance_sample_count_256": tick(performance_sample_count==256),
"performance_sample_count_64": tick(performance_sample_count==64),
"performance_sample_count_not_met": tick(not performance_sample_count_met), # TODO
# LoadGen revision.
"revision_61220457de": tick(revision=='61220457de'),
"revision_5684c11e39": tick(revision=='5684c11e39'),
"revision_55c0ea4e77": tick(revision=='55c0ea4e77'),
"revision_d31c18fbd9": tick(revision=='d31c18fbd9'),
"revision_1d0e06e54a": tick(revision=='1d0e06e54a'),
"revision_other": tick(revision_other),
"revision_other_specify": revision if revision_other else '',
# Numerics.
"numerics_uint8": tick(numerics=='uint8'),
"numerics_fp32": tick(numerics=='fp32'),
}
print(checklist)
print("-" * 100)
return checklist
# null = get_checklist(system='rpi4-armnn-v19.08-neon', system_name='Raspberry Pi 4 (rpi4)', benchmark='mobilenet', accuracy_pc=70.241, numerics='uint8')
# null = get_checklist(system='hikey960-tflite-v1.15', system_name='Linaro HiKey 960 (hikey960)', benchmark='resnet', accuracy_pc=75.692, revision='deadbeef')
null = get_checklist(system='velociti-tensorflow-v1.14-cpu', name='Anton Lokhmotov; Emanuele Vitali', email='anton@dividiti.com; emanuele.vitali@polimi.it', system_name='HP Z640 G1X62EA workstation (velociti)', division='open', category='RDI', benchmark='ssd-mobilenet-fpn')
#
# ## Check the experimental data
# In[ ]:
#
# Image Classification - Closed (MobileNet, ResNet).
#
repos_image_classification_closed = [
# firefly
'mlperf.closed.image-classification.firefly.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/18
'mlperf.closed.image-classification.firefly.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/21
'mlperf.closed.image-classification.firefly.armnn-v19.08.opencl', #https://github.com/mlperf/submissions_inference_0_5/pull/22
# hikey960
'mlperf.closed.image-classification.hikey960.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/23
'mlperf.closed.image-classification.hikey960.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/24
'mlperf.closed.image-classification.hikey960.armnn-v19.08.opencl', # https://github.com/mlperf/submissions_inference_0_5/pull/25
# rpi4
'mlperf.closed.image-classification.rpi4.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/26/
'mlperf.closed.image-classification.rpi4.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/30
# mate10pro
'mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/32
'mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl', # https://github.com/mlperf/submissions_inference_0_5/pull/35
]
repos_image_classification_closed_audit = [
'mlperf.closed.image-classification.firefly.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/234
'mlperf.closed.image-classification.hikey960.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/236
'mlperf.closed.image-classification.rpi4.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/238
#'mlperf.closed.image-classification.mate10pro.audit',
]
#
# Image Classification - Open (MobileNets-v1,v2).
#
repos_image_classification_open = [
# firefly
'mlperf.open.image-classification.firefly.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/39
'mlperf.open.image-classification.firefly.tflite-v1.15.mobilenet-v1-quantized', # https://github.com/mlperf/submissions_inference_0_5/pull/127
'mlperf.open.image-classification.firefly.armnn-v19.08.opencl', # https://github.com/mlperf/submissions_inference_0_5/pull/40
'mlperf.open.image-classification.firefly.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/120
# hikey960
'mlperf.open.image-classification.hikey960.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/37
'mlperf.open.image-classification.hikey960.tflite-v1.15.mobilenet-v1-quantized', # https://github.com/mlperf/submissions_inference_0_5/pull/128
'mlperf.open.image-classification.hikey960.armnn-v19.08.opencl', # https://github.com/mlperf/submissions_inference_0_5/pull/38
'mlperf.open.image-classification.hikey960.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/121
# rpi4
'mlperf.open.image-classification.rpi4.tflite-v1.15', # https://github.com/mlperf/submissions_inference_0_5/pull/122
'mlperf.open.image-classification.rpi4.tflite-v1.15.mobilenet-v1-quantized', # https://github.com/mlperf/submissions_inference_0_5/pull/129
'mlperf.open.image-classification.rpi4.armnn-v19.08.neon', # https://github.com/mlperf/submissions_inference_0_5/pull/123
# mate10pro
'mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet', # https://github.com/mlperf/submissions_inference_0_5/pull/130
'mlperf.open.image-classification.mate10pro.tflite-v1.13.mobilenet-v1-quantized', # https://github.com/mlperf/submissions_inference_0_5/pull/135
]
repos_image_classification_open_audit = [
'mlperf.open.image-classification.firefly.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/255
'mlperf.open.image-classification.hikey960.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/257
'mlperf.open.image-classification.rpi4.audit', # https://github.com/mlperf/submissions_inference_0_5/pull/258
#'mlperf.open.image-classification.mate10pro.audit',
]
#
# Object Detection - Open (TensorFlow Model Zoo + YOLO-v3)
#
repos_object_detection_open = [
# velociti
'mlperf.open.object-detection.velociti', # https://www.dropbox.com/s/wiea3a8zf077jsv/mlperf.open.object-detection.velociti.zip
]
# In[ ]:
# repos_for_testing = [
# 'mlperf.closed.image-classification.mate10pro.tflite-v1.13.mobilenet.BAD_LOADGEN',
# 'mlperf.closed.image-classification.mate10pro.armnn-v19.08.opencl.BAD_RESNET',
# 'mlperf.closed.image-classification.mate10pro.armnn-v19.08.neon.BAD_RESNET',
# 'mlperf-inference-vision-experiments-count5'
# ]
# In[ ]:
# #!ck recache repo
# for repo_uoa in repos:
# print("=" * 100)
# print(repo_uoa)
# print("=" * 100)
# !ck list $repo_uoa:experiment:* | sort
# print("-" * 100)
# print("")
# In[ ]:
# Locate upstream master.
# r = ck.access({'action':'locate', 'module_uoa':'env', 'tags':'mlperf,inference,source,upstream.master'})
# Locate variation with audit test fixes.
r = ck.access({'action':'locate', 'module_uoa':'env', 'tags':'mlperf,inference,source,upstream.pr518'})
if r['return']>0:
print('Error: %s' % r['error'])
exit(1)
# Pick any source location and look under 'inference/v0.5/mlperf.conf'.
upstream_path = os.path.join(list(r['install_locations'].values())[0], 'inference')
upstream_path
# In[ ]:
def check_experimental_results(repo_uoa, module_uoa='experiment', tags='mlperf', submitter='dividiti', path=None, audit=False):
if not path:
path_list = get_ipython().getoutput('ck find repo:$repo_uoa')
path = path_list[0]
root_dir = os.path.join(path, 'submissions_inference_0_5')
if not os.path.exists(root_dir): os.mkdir(root_dir)
print("Storing results under '%s'" % root_dir)
r = ck.access({'action':'search', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'tags':tags})
if r['return']>0:
print('Error: %s' % r['error'])
exit(1)
experiments = r['lst']
for experiment in experiments:
data_uoa = experiment['data_uoa']
r = ck.access({'action':'list_points', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'data_uoa':data_uoa})
if r['return']>0:
print('Error: %s' % r['error'])
exit(1)
print("*" * 100)
tags = r['dict']['tags']
#print(tags)
backend = ''
preprocessing = ''
if 'velociti' in tags:
# Expected format: [ "mlperf", "open", "object-detection", "velociti", "cpu", "rcnn-inception-resnet-v2-lowproposals", "singlestream", "accuracy" ]
(_, division, task, platform, backend, benchmark, scenario, mode) = tags
library = 'tensorflow-v1.14'
elif 'accuracy' in tags:
# FIXME: With the benefit of hindsight, [ ..., "armnn-v19.08", "neon", ... ] should have come
# as one tag "armnn-v19.08-neon", since we join them in this notebook anyway.
if 'neon' in tags or 'opencl' in tags:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "armnn-v19.08", "neon", "mobilenet-v1-0.5-128", "singlestream", "accuracy", "using-opencv" ]
(_, division, task, platform, library, backend, benchmark, scenario, mode, preprocessing) = tags
else:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "tflite-v1.15", "mobilenet-v1-0.5-128", "singlestream", "accuracy", "using-opencv" ]
(_, division, task, platform, library, benchmark, scenario, mode, preprocessing) = tags
elif 'performance' in tags:
if 'neon' in tags or 'opencl' in tags:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "armnn-v19.08", "neon", "mobilenet-v1-0.5-128", "singlestream", "performance" ]
(_, division, task, platform, library, backend, benchmark, scenario, mode) = tags
else:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "tflite-v1.15", "mobilenet-v1-0.5-128", "singlestream", "performance" ]
(_, division, task, platform, library, benchmark, scenario, mode) = tags
elif 'audit' in tags: # As accuracy but with the test name instead of the preprocessing method.
if 'neon' in tags or 'opencl' in tags:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "armnn-v19.08", "neon", "mobilenet-v1-0.5-128", "singlestream", "audit", "TEST03" ]
(_, division, task, platform, library, backend, benchmark, scenario, mode, test) = tags
else:
# Expected format: [ "mlperf", "open", "image-classification", "firefly", "tflite-v1.15", "mobilenet-v1-0.5-128", "singlestream", "audit", "TEST03" ]
(_, division, task, platform, library, benchmark, scenario, mode, test) = tags
else:
raise "Expected 'accuracy' or 'performance' or 'audit' in tags!"
# if mode == 'accuracy': continue
organization = submitter
if backend != '':
system = platform+'-'+library+'-'+backend
else:
system = platform+'-'+library
division_system = division+'-'+system
if library.startswith('tflite'):
implementation = task+'-tflite'
elif library.startswith('armnn'):
implementation = task+'-armnn-tflite'
else: # Official app with CK adaptations.
implementation = 'mlperf-inference-vision'
implementation_benchmark = implementation+'-'+benchmark
#
# Directory structure according to the Inference section of the General MLPerf Submission Rules:
# https://github.com/mlperf/policies/blob/master/submission_rules.adoc#552-inference
#
# /
# /
#
division_dir = os.path.join(root_dir, division)
if not os.path.exists(division_dir): os.mkdir(division_dir)
organization_dir = os.path.join(division_dir, organization)
if not os.path.exists(organization_dir): os.mkdir(organization_dir)
#
# "systems"/
# .json
#
systems_dir = os.path.join(organization_dir, 'systems')
if not os.path.exists(systems_dir): os.mkdir(systems_dir)
system_json_name = '%s.json' % system
system_json_path = os.path.join(systems_dir, system_json_name)
with open(system_json_path, 'w') as system_json_file:
# pprint(division_system)
# pprint(division_systems)
system_json = division_systems.get(division_system, default_system_json)
json.dump(system_json, system_json_file, indent=2)
print('%s' % systems_dir)
if system_json == default_system_json:
print(' |_ %s [DEFAULT]' % system_json_name)
raise
else:
print(' |_ %s [%s]' % (system_json_name, division_system))
#
# "code"/
# /
# /
#
#
code_dir = os.path.join(organization_dir, 'code')
if not os.path.exists(code_dir): os.mkdir(code_dir)
# FIXME: For now, not always "per reference".
benchmark_dir = os.path.join(code_dir, benchmark)
if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
implementation_dir = os.path.join(benchmark_dir, implementation)
if not os.path.exists(implementation_dir): os.mkdir(implementation_dir)
print('%s' % code_dir)
# Create 'README.md'.
implementation_readme_name = 'README.md'
implementation_readme_path = os.path.join(implementation_dir, implementation_readme_name)
# pprint(implementation)
# pprint(implementation_readmes)
implementation_readme = implementation_readmes.get(implementation, '')
with open(implementation_readme_path, 'w') as implementation_readme_file:
implementation_readme_file.writelines(implementation_readme)
if implementation_readme == '':
print(' |_ %s [EMPTY]' % implementation_readme_name)
raise
else:
print(' |_ %s' % implementation_readme_name)
#
# "measurements"/
# /
# /
# /
# _.json
# README.md
# user.conf
# mlperf.conf
# calibration_process.adoc (?)
# submission_checklist.txt
#
measurements_dir = os.path.join(organization_dir, 'measurements')
if not os.path.exists(measurements_dir): os.mkdir(measurements_dir)
system_dir = os.path.join(measurements_dir, system)
if not os.path.exists(system_dir): os.mkdir(system_dir)
benchmark_dir = os.path.join(system_dir, benchmark)
if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
scenario_dir = os.path.join(benchmark_dir, scenario)
if not os.path.exists(scenario_dir): os.mkdir(scenario_dir)
print(scenario_dir)
# Create '_.json'.
system_implementation_json_name = system+'_'+implementation+'.json'
system_implementation_json_path = os.path.join(scenario_dir, system_implementation_json_name)
with open(system_implementation_json_path, 'w') as system_implementation_json_file:
implementation_benchmark_json = implementation_benchmarks.get(implementation_benchmark, default_implementation_benchmark_json)
if implementation_benchmark_json != default_implementation_benchmark_json:
print(' |_ %s [for %s]' % (system_implementation_json_name, implementation_benchmark))
json.dump(implementation_benchmark_json, system_implementation_json_file, indent=2)
else:
print(' |_ %s [DEFAULT]' % system_implementation_json_name)
raise "Default implementation!"
# Create 'README.md' based on the division and task (basically, mentions a division- and task-specific script).
measurements_readme_name = 'README.md'
measurements_readme_path = os.path.join(scenario_dir, measurements_readme_name)
measurements_readme = measurements_readmes.get(division+'-'+task, '')
if measurements_readme != '':
with open(measurements_readme_path, 'w') as measurements_readme_file:
measurements_readme_file.writelines(measurements_readme)
print(' |_ %s [for %s %s]' % (measurements_readme_name, division, task))
else:
raise "Invalid measurements README!"
# Copy 'user.conf' from implementation source.
user_conf_name = 'user.conf'
implementation_path = implementation_paths.get(implementation, '')
# pprint(implementation)
# pprint(implementation_paths)
if implementation_path != '':
user_conf_path = os.path.join(implementation_path, user_conf_name)
copy2(user_conf_path, scenario_dir)
print(' |_ %s [from %s]' % (user_conf_name, user_conf_path))
else:
raise "Invalid implementation path!"
# Copy 'mlperf.conf' from MLPerf Inference source.
mlperf_conf_name = 'mlperf.conf'
mlperf_conf_path = os.path.join(scenario_dir, mlperf_conf_name)
if implementation in [ 'image-classification-tflite', 'image-classification-armnn-tflite' ]:
# Write a snapshot from https://github.com/dividiti/inference/blob/61220457dec221ed1984c62bd9d382698bd71bc6/v0.5/mlperf.conf
with open(mlperf_conf_path, 'w') as mlperf_conf_file:
mlperf_conf_file.writelines(mlperf_conf_6122045)
print(' |_ %s [from %s]' % (mlperf_conf_name, 'github.com/mlperf/inference@6122045'))
else:
upstream_mlperf_conf_path = os.path.join(upstream_path, 'v0.5', 'mlperf.conf')
copy2(upstream_mlperf_conf_path, mlperf_conf_path)
print(' |_ %s [from %s]' % (mlperf_conf_name, upstream_mlperf_conf_path))
# Write submission_checklist.txt into the same directory later, once accuracy.txt is parsed.
#
# "results"/
# /
# /
# /
# performance/
# run_x/ # 1 run for single stream and offline, 5 otherwise
# mlperf_log_summary.txt
# mlperf_log_detail.txt
# mlperf_log_trace.json
# accuracy/
# mlperf_log_accuracy.json
# compliance_checker_log.txt
#
results_dir = os.path.join(organization_dir, 'results')
if not os.path.exists(results_dir): os.mkdir(results_dir)
system_dir = os.path.join(results_dir, system)
if not os.path.exists(system_dir): os.mkdir(system_dir)
benchmark_dir = os.path.join(system_dir, benchmark)
if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
scenario_dir = os.path.join(benchmark_dir, scenario)
if not os.path.exists(scenario_dir): os.mkdir(scenario_dir)
mode_dir = os.path.join(scenario_dir, mode)
if not os.path.exists(mode_dir): os.mkdir(mode_dir)
print(mode_dir)
if audit:
# Deal with a subset of audit tests.
# if test not in [ 'TEST03' ]: # [ 'TEST01', 'TEST03', 'TEST04-A', 'TEST04-B', 'TEST05' ]:
# continue
# Save the accuracy and performance dirs for the corresponding submission experiment.
accuracy_dir = os.path.join(scenario_dir, 'accuracy')
performance_dir = os.path.join(scenario_dir, 'performance', 'run_1')
# Use the mode expected for each test.
mode = 'performance' if test != 'TEST03' else 'submission'
# Create a similar directory structure to results_dir, with another level, test_dir,
# between scenario_dir and mode_dir.
audit_dir = os.path.join(organization_dir, 'audit')
if not os.path.exists(audit_dir): os.mkdir(audit_dir)
system_dir = os.path.join(audit_dir, system)
if not os.path.exists(system_dir): os.mkdir(system_dir)
benchmark_dir = os.path.join(system_dir, benchmark)
if not os.path.exists(benchmark_dir): os.mkdir(benchmark_dir)
scenario_dir = os.path.join(benchmark_dir, scenario)
if not os.path.exists(scenario_dir): os.mkdir(scenario_dir)
test_dir = os.path.join(scenario_dir, test)
if not os.path.exists(test_dir): os.mkdir(test_dir)
mode_dir = os.path.join(test_dir, mode)
if not os.path.exists(mode_dir): os.mkdir(mode_dir)
# For each point (should be one point for each performance run).
points = r['points']
for (point, point_idx) in zip(points, range(1,len(points)+1)):
point_file_path = os.path.join(r['path'], 'ckp-%s.0001.json' % point)
with open(point_file_path) as point_file:
point_data_raw = json.load(point_file)
characteristics_list = point_data_raw['characteristics_list']
characteristics = characteristics_list[0]
# Set the leaf directory.
if mode == 'performance':
run_dir = os.path.join(mode_dir, 'run_%d' % point_idx)
if not os.path.exists(run_dir): os.mkdir(run_dir)
last_dir = run_dir
else:
last_dir = mode_dir
print(last_dir)
# Dump files in the leaf directory.
mlperf_log = characteristics['run'].get('mlperf_log',{})
# Summary file (with errors and warnings in accuracy mode, with statistics in performance mode).
summary_txt_name = 'mlperf_log_summary.txt'
summary_txt_path = os.path.join(last_dir, summary_txt_name)
summary = mlperf_log.get('summary','')
with open(summary_txt_path, 'w') as summary_txt_file:
summary_txt_file.writelines(summary)
print(' |_ %s' % summary_txt_name)
# Detail file (with settings).
detail_txt_name = 'mlperf_log_detail.txt'
detail_txt_path = os.path.join(last_dir, detail_txt_name)
detail = mlperf_log.get('detail','')
with open(detail_txt_path, 'w') as detail_txt_file:
detail_txt_file.writelines(detail)
print(' |_ %s' % detail_txt_name)
# Accuracy file (with accuracy dictionary).
# TODO: Move the next 5 lines into the (if mode == 'accuracy') block,
# once the submission checker no longer complains as follows:
# "performance/run_1 has file list mismatch (['mlperf_log_accuracy.json'])"
accuracy_json_name = 'mlperf_log_accuracy.json'
accuracy_json_path = os.path.join(last_dir, accuracy_json_name)
with open(accuracy_json_path, 'w') as accuracy_json_file:
json.dump(mlperf_log.get('accuracy',{}), accuracy_json_file, indent=2)
print(' |_ %s' % accuracy_json_name)
# Do what's required by NVIDIA's audit tests.
if audit:
test_path = os.path.join(upstream_path, 'v0.5', 'audit', 'nvidia', test)
if 'TEST01' in tags:
# Verify that the accuracy (partially) dumped for the audit test matches that for the submision.
verify_accuracy_py = os.path.join(test_path, 'verify_accuracy.py')
submission_accuracy_json_path = os.path.join(accuracy_dir, accuracy_json_name)
verify_accuracy_txt = get_ipython().getoutput('python3 $verify_accuracy_py -a $submission_accuracy_json_path -p $accuracy_json_path')
verify_accuracy_txt_name = 'verify_accuracy.txt'
verify_accuracy_txt_path = os.path.join(test_dir, verify_accuracy_txt_name)
with open(verify_accuracy_txt_path, 'w') as verify_accuracy_txt_file:
verify_accuracy_txt_file.writelines('\n'.join(verify_accuracy_txt))
print('%s' % test_dir)
print(' |_ %s' % verify_accuracy_txt_name)
if test in [ 'TEST01', 'TEST03', 'TEST05' ]:
# Verify that the performance for the audit test matches that for the submission.
verify_performance_py = os.path.join(test_path, 'verify_performance.py')
submission_summary_txt_path = os.path.join(performance_dir, summary_txt_name)
verify_performance_txt = get_ipython().getoutput('python3 $verify_performance_py -r $submission_summary_txt_path -t $summary_txt_path')
verify_performance_txt_name = 'verify_performance.txt'
verify_performance_txt_path = os.path.join(test_dir, verify_performance_txt_name)
with open(verify_performance_txt_path, 'w') as verify_performance_txt_file:
verify_performance_txt_file.writelines('\n'.join(verify_performance_txt))
print('%s' % test_dir)
print(' |_ %s' % verify_performance_txt_name)
if test in [ 'TEST04-A', 'TEST04-B' ]:
test04a_summary_txt_path = os.path.join(scenario_dir, 'TEST04-A', 'performance', 'run_1', summary_txt_name)
test04b_summary_txt_path = os.path.join(scenario_dir, 'TEST04-B', 'performance', 'run_1', summary_txt_name)
if os.path.exists(test04a_summary_txt_path) and os.path.exists(test04b_summary_txt_path):
# If both tests have been processed, verify that their performance matches.
verify_performance_py = os.path.join(upstream_path, 'v0.5', 'audit', 'nvidia', 'TEST04-A', 'verify_test4_performance.py')
#print("python3 {} -u {} -s {}".format(verify_performance_py, test04a_summary_txt_path, test04b_summary_txt_path))
verify_performance_txt = get_ipython().getoutput('python3 $verify_performance_py -u $test04a_summary_txt_path -s $test04b_summary_txt_path')
#print(verify_performance_txt)
verify_performance_txt_name = 'verify_performance.txt'
verify_performance_txt_path = os.path.join(scenario_dir, 'TEST04-A', verify_performance_txt_name)
with open(verify_performance_txt_path, 'w') as verify_performance_txt_file:
verify_performance_txt_file.writelines('\n'.join(verify_performance_txt))
print('%s' % os.path.join(scenario_dir, 'TEST04-A'))
print(' |_ %s' % verify_performance_txt_name)
else:
# Need both A/B tests to be processed. Wait for the other one.
continue
# Generate accuracy.txt.
if mode == 'accuracy' or mode == 'submission':
accuracy_txt_name = 'accuracy.txt'
accuracy_txt_path = os.path.join(last_dir, accuracy_txt_name)
if task == 'image-classification':
accuracy_imagenet_py = os.path.join(upstream_path, 'v0.5', 'classification_and_detection', 'tools', 'accuracy-imagenet.py')
imagenet_val_file = '$HOME/CK_TOOLS/dataset-imagenet-ilsvrc2012-aux/val.txt' # FIXME: Do not hardcode - locate via CK.
accuracy_txt = get_ipython().getoutput('python3 $accuracy_imagenet_py --imagenet-val-file $imagenet_val_file --mlperf-accuracy-file $accuracy_json_path')
# The last (and only line) is e.g. "accuracy=76.442%, good=38221, total=50000".
accuracy_line = accuracy_txt[-1]
match = re.match('accuracy=(.+)%, good=(\d+), total=(\d+)', accuracy_line)
accuracy_pc = float(match.group(1))
elif task == 'object-detection':
accuracy_coco_py = os.path.join(upstream_path, 'v0.5', 'classification_and_detection', 'tools', 'accuracy-coco.py')
coco_dir = '/home/anton/CK_TOOLS/dataset-coco-2017-val' # FIXME: Do not hardcode - locate via CK.
os.environ['PYTHONPATH'] = pythonpath_coco+':'+os.environ.get('PYTHONPATH','')
accuracy_txt = get_ipython().getoutput('python3 $accuracy_coco_py --coco-dir $coco_dir --mlperf-accuracy-file $accuracy_json_path')
# The last line is e.g. "mAP=13.323%".
accuracy_line = accuracy_txt[-1]
match = re.match('mAP\=([\d\.]+)\%', accuracy_line)
accuracy_pc = float(match.group(1))
else:
raise "Invalid task '%s'!" % task
with open(accuracy_txt_path, 'w') as accuracy_txt_file:
accuracy_txt_file.writelines('\n'.join(accuracy_txt))
print(' |_ %s [%.3f%% parsed from "%s"]' % (accuracy_txt_name, accuracy_pc, accuracy_line))
# Generate submission_checklist.txt for each system, benchmark and scenario under "measurements/".
if mode == 'accuracy' and not audit:
checklist_name = 'submission_checklist.txt'
checklist_path = os.path.join(measurements_dir, system, benchmark, scenario, checklist_name)
system_json = division_systems.get(division_system, default_system_json)
# Extract LoadGen revision from the second line of e.g.
# "pid": 28660, "tid": 28660, "ts": 8750ns : version : .5a1 @ 61220457de
# FIXME: In practice, the revision may be different for accuracy and performance runs
# (happened on rpi4 due to a late LoadGen fix). We would prefer to use one from
# the performance one, as it may be more critical for performance evaluation.
# However, as we only write the checklist from the accuracy run, we are somewhat stuck.
loadgen_revision = detail[1].split('@')[1].strip()
# FIXME: The actual performance_sample_count can be extracted from the performance run.
# Again, this is not available to us here.
# We could check in user.conf, but we would need to parse it.
performance_sample_count = 1024 if task == 'image-classification' else 256
# Write the checklist.
if division == 'open' and task == 'object-detection':
# Collaboration between dividiti and Politecnico di Milano.
print(system)
checklist = get_checklist(name='Anton Lokhmotov; Emanuele Vitali',
email='anton@dividiti.com; emanuele.vitali@polimi.it',
division=division, task=task, system=system,
system_name=system_json['system_name'], category=system_json['status'],
revision=loadgen_revision, benchmark=benchmark, accuracy_pc=accuracy_pc,
performance_sample_count=performance_sample_count,
numerics=implementation_benchmark_json['weight_data_types'])
else:
checklist = get_checklist(division=division, task=task, system=system,
system_name=system_json['system_name'], category=system_json['status'],
revision=loadgen_revision, benchmark=benchmark, accuracy_pc=accuracy_pc,
performance_sample_count=performance_sample_count,
numerics=implementation_benchmark_json['weight_data_types'])
with open(checklist_path, 'w') as checklist_file:
checklist_file.writelines(checklist)
# # Trace file (should omit trace from v0.5).
# trace_json_name = 'mlperf_log_trace.json'
# trace_json_path = os.path.join(last_dir, trace_json_name)
# with open(trace_json_path, 'w') as trace_json_file:
# json.dump(mlperf_log.get('trace',{}), trace_json_file, indent=2)
return
# In[ ]:
# The path is where mlperf/submissions_inference_0_5 is cloned under.
path = '/home/anton/projects/mlperf/'
submitter = 'dividiti'
# ### Extract submission repos
# In[ ]:
# # repos = repos_image_classification_closed + repos_image_classification_open + repos_object_detection_open
# repos = [ 'mlperf.open.image-classification.firefly.tflite-v1.15.mobilenet-v1-quantized' ]
# for repo_uoa in repos:
# check_experimental_results(repo_uoa, path=path, submitter=submitter, audit=False)
# ### Extract audit repos
# In[ ]:
# # audit_repos = repos_image_classification_closed_audit + repos_image_classification_open_audit
# audit_repos = [ 'mlperf.closed.image-classification.mate10pro.audit' ]
# for repo_uoa in audit_repos:
# check_experimental_results(repo_uoa, path=path, submitter=submitter, audit=True)
# ### Run submission checker
# In[ ]:
print("*" * 100)
submission_checker_py = os.path.join(upstream_path, 'v0.5', 'tools', 'submission', 'submission-checker.py')
# The checker has a weird bug. When submitting to open, 'closed//results' must exist on disk.
# Vice versa, When submitting to closed, 'open//results' must exist on disk.
# Therefore, create both directories if they do not exist before invoking the checker.
root_dir = os.path.join(path, 'submissions_inference_0_5')
open_org_results_dir = os.path.join(root_dir, 'open', submitter, 'results')
closed_org_results_dir = os.path.join(root_dir, 'closed', submitter, 'results')
get_ipython().system('mkdir -p $open_org_results_dir')
get_ipython().system('mkdir -p $closed_org_results_dir')
# Run the checker.
checker_log = get_ipython().getoutput('python3 $submission_checker_py --input $root_dir --submitter $submitter')
checker_log = "\n".join(checker_log)
print(checker_log)
checker_log_name = 'compliance_checker_log.txt'
# Write the checker results once closed/ and once under open/.
for results_dir in [ open_org_results_dir, closed_org_results_dir ]:
checker_log_path = os.path.join(results_dir, checker_log_name)
with open(checker_log_path, 'w') as checker_log_file:
checker_log_file.writelines(checker_log)
print(results_dir)
print(' |_%s' % checker_log_name)