# [MLPerf Inference Results v0.5](https://github.com/mlperf/inference/tree/master/v0.5)
## Automatic results table generation (c) [dividiti](http://dividiti.com/)

## Includes

In [None]:
import os
import re
import json
from pprint import pprint

In [None]:
import IPython as ip
import pandas as pd
import numpy as np
import matplotlib as mp
# import seaborn as sb

In [None]:
print ('IPython version: %s' % ip.__version__)
print ('Pandas version: %s' % pd.__version__)
print ('NumPy version: %s' % np.__version__)
print ('Matplotlib version: %s' % mp.__version__)
# print ('Seaborn version: %s' % sb.__version__)

In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline

default_dpi = 300
default_fontsize = 12
mp.rcParams['figure.dpi'] = default_dpi
mp.rcParams['font.size'] = default_fontsize

In [None]:
from IPython.display import Image, display
def display_in_full(df):
    pd.options.display.max_columns = len(df.columns)
    pd.options.display.max_rows = len(df.index)
    display(df)

## Definitions

### Divisions

In [None]:
divisions = [ 'closed', 'open' ]

### System JSON

In [None]:
# Default `system_desc_id.json` (to catch uninitialized descriptions)
default_system_json = {
    "division": "required",
    "submitter": "required",
    "status": "required",
    "system_name": "required",

    "number_of_nodes": "required",
    "host_processor_model_name": "required",
    "host_processors_per_node": "required",
    "host_processor_core_count": "required",
    "host_processor_frequency": "",
    "host_processor_caches": "",
    "host_memory_configuration": "",
    "host_memory_capacity": "required",
    "host_storage_capacity": "required",
    "host_storage_type": "required",
    "host_processor_interconnect": "",
    "host_networking": "",
    "host_networking_topology": "",

    "accelerators_per_node": "required",
    "accelerator_model_name": "required",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "required",
    "accelerator_memory_configuration": "",
    "accelerator_on-chip_memories": "",
    "cooling": "",
    "hw_notes": "",

    "framework": "required",
    "operating_system": "required",
    "other_software_stack": "required",
    "sw_notes": ""
}

### Maps for DataFrame construction

In [None]:
division_to_str = {
    # Open.
    'open'   : 'Open',
    'Open'   : 'Open',
    # Closed.
    'closed' : 'Closed',
    'Closed' : 'Closed'
}

## Code

In [None]:
# git clone https://github.com/mlperf/inference_results_v0.5
#results_path = '/home/anton/projects/mlperf/inference_results_v0.5'
results_path = '/home/anton/projects/mlperf/submissions_inference_0_5'

In [None]:
dfs = []
# FOR EACH division.
for division in divisions:
    #if division == 'open': continue # skip
    # FOR EACH submitter.
    submitters_dir = os.path.join(results_path, division)
    submitters = [ fn for fn in os.listdir(submitters_dir) if os.path.isdir(os.path.join(submitters_dir, fn)) ]
    for submitter in submitters:
        # Selectively filter out submitters.
        #all_submitters_closed = [ 'Alibaba', 'CentaurTechnology', 'DellEMC', 'dividiti', 'FuriosaAI', 'Google', 'Habana', 'Hailo', 'Intel', 'NVIDIA', 'Qualcomm', 'Tencent' ]
        #if division == 'closed' and submitter not in all_submitters_closed: continue
        #all_submitters_open = [ 'dividiti', 'Habana', 'Inspur', 'NVIDIA', 'Qualcomm' ]
        #if division == 'open' and submitter not in all_submitters_open: continue
        # FOR EACH system.
        results_dir = os.path.join(submitters_dir, submitter, 'results')
        systems = [ fn for fn in os.listdir(results_dir) if os.path.isdir(os.path.join(results_dir, fn)) ]
        for system in systems:
            system_dir = os.path.join(results_dir, system)
            system_json_name = system + '.json'
            system_json_path = os.path.join(submitters_dir, submitter, 'systems', system_json_name)
            with open(system_json_path) as system_json_file:
                system_json = json.load(system_json_file)
            
            # Validate division.
            division_from_system_json = system_json.get('division')
            if division_from_system_json == None:
                print("[WARNING] no division key in {}".format(system_json_name))
            if division_from_system_json != division:
                print("[WARNING] bad division key in {}: {} != {}".format(system_json_name, division_from_system_json, division))
            # Validate submitter.
            submitter_from_system_json = system_json.get('submitter')
            if submitter_from_system_json == None:
                print("[WARNING] no submitter key in {}".format(system_json_name))
            if submitter_from_system_json != submitter:
                print("[WARNING] bad submitter key in {}: {} != {}".format(system_json_name, submitter_from_system_json, submitter))
            # Create DataFrame for each row of the final table.
            data = [{
                'JSON' : system_json_name,
                'URL' : 'https://github.com/mlperf/inference_results_v0.5/blob/master/{}/{}/systems/{}'. \
                format(division, submitter, system_json_name)
            }]
            for key in default_system_json.keys():
                data[0][key] = system_json.get(key, 'NO_KEY')
            index = [
                'JSON'
            ]
            df = pd.DataFrame(data)
            df = df.set_index(index)
            dfs.append(df)
        # END OF FOR EACH system
    # END OF FOR EACH submitter
# END OF FOR EACH division

# Concatenate all thus constructed DataFrames (i.e. stack on top of each other).
df = pd.concat(dfs)
display_in_full(df)

## Dump Excel (into separate sheets for different system keys)

In [None]:
# Create a Pandas Excel writer using XlsxWriter as the engine.
from pandas import ExcelWriter
# NB: Cannot use dot for 'v0.5', as otherwise the engine complains about an unknown extension.
xlsx_filename = 'MLPerf Inference v0_5 - Systems (Automatically Generated).xlsx'
xlsx_writer = ExcelWriter(xlsx_filename, engine='xlsxwriter', options={'strings_to_urls': True})
# Write the DataFrame to one sheet and the transposed DataFrame to another.
df.to_excel(xlsx_writer, sheet_name='ALL', index=True)
df.T.to_excel(xlsx_writer, sheet_name='ALL-TRANSPOSED', index=True)
# Write different system keys to different sheets.
for key in df.columns:
    if key == 'URL': continue
    print('*' * 100)
    print('* Key: %s' % (key))
    print('*' * 100)
    df_xlsx = df[[key,'URL']]
    # Limit sheet name to 31 symbols. Do not omit index (JSON).
    df_xlsx.to_excel(xlsx_writer, sheet_name='{}'.format(key[:31]), index=True)
    display_in_full(df_xlsx)
    print('')
xlsx_writer.save()
!cp "$xlsx_filename" ~/Downloads

## Display HTML with embedded links (TODO)

In [None]:
# df = df.set_index(['Submitter', 'System', 'Benchmark', 'Software'], append=True)
# def link_code(url): return '<a target="_blank" href="{}">Code</a>'.format(url)
# def link_details(url): return '<a target="_blank" href="{}">Details</a>'.format(url)
# display_in_full(df.style.format({'Code': link_code, 'Details': link_details}))