[ { "50.00 percentile latency (ns)": 341926540846, "90.00 percentile latency (ns)": 614575339639, "95.00 percentile latency (ns)": 648605798912, "97.00 percentile latency (ns)": 662080953027, "99.00 percentile latency (ns)": 675683030080, "99.90 percentile latency (ns)": 681853381597, "Max latency (ns)": 682335659421, "Mean latency (ns)": 341902879695, "Min duration satisfied": "Yes", "Min latency (ns)": 1225270205, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "PySUT", "Samples per second": 885.048, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.844, "characteristics.good": 37922, "characteristics.samples_per_second": 885.048, "characteristics.samples_per_second.normalized_per_core": 73.754, "characteristics.samples_per_second.normalized_per_processor": 885.048, "characteristics.total": 50000, "ck_system": "aws-m5zn.6xlarge-tvm", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TVM v0.8-dev (DNNL)", "host_memory_capacity": "384GB", "host_memory_configuration": "6 slots / 32GB each / 2934 MT/s per socket", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "L1d cache: 384 KiB; L1i cache: 384 KiB; L2 cache: 12 MiB; L3 cache: 24.8 MiB", "host_processor_core_count": 12, "host_processor_frequency": "3800 MHz", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz", "host_processors_per_node": 1, "host_storage_capacity": "50 GiB", "host_storage_type": "SSD", "hw_notes": "https://aws.amazon.com/blogs/aws/new-ec2-m5zn-instances-fastest-intel-xeon-scalable-cpu-in-the-cloud ; vCPU: 24", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 610000, "max_query_count": 0, "min_duration (ms)": 610000, "min_query_count": 1, "mlperf_version": 1.1, "normalize_cores": 12, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/open/OctoML/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/open/OctoML/results/aws-m5zn.6xlarge-tvm", "number_of_nodes": 1, "operating_system": "Ubuntu 20.04.2 LTS (Linux-5.8.0-1041-aws-x86_64-with-glibc2.29)", "other_software_stack": "5.4.0-45-generic; Python 3.8.10; GCC 9.3.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 1024, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "no", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 603900, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "https://zenodo.org/record/4589637/files/resnet50_INT8bit_quantized.pt", "status": "available", "submitter": "OctoML", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/OctoML", "sw_notes": "Automated by MLCommons Collective Knowledge v2.5.8 (https://github.com/mlcommons/ck) and the CK-powered MLPerf submission workflow (https://github.com/mlcommons/ck-mlops/tree/main/module/bench.mlperf.inference)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/aws-m5zn.6xlarge-tvm", "system_name": "Amazon EC2 (m5zn.6xlarge)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 900, "task": "image classification", "task2": "image classification", "total_cores": 12, "uid": "5b7d46fca689ab54", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "no" }, { "50.00 percentile latency (ns)": 339060365460, "90.00 percentile latency (ns)": 609590646273, "95.00 percentile latency (ns)": 643481901225, "97.00 percentile latency (ns)": 656910592424, "99.00 percentile latency (ns)": 670442921740, "99.90 percentile latency (ns)": 676701658598, "Max latency (ns)": 676815643660, "Mean latency (ns)": 339704999355, "Min duration satisfied": "Yes", "Min latency (ns)": 2447993079, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "PySUT", "Samples per second": 2081.96, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.844, "characteristics.good": 37922, "characteristics.samples_per_second": 2081.96, "characteristics.samples_per_second.normalized_per_core": 52.049, "characteristics.samples_per_second.normalized_per_processor": 1040.98, "characteristics.total": 50000, "ck_system": "gcp-n2-standard-80-tvm", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TVM v0.8-dev (DNNL)", "host_memory_capacity": "320GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "L1d cache: 1.3 MiB; L1i cache: 1.3 MiB; L2 cache: 40 MiB; L3 cache: 66 MiB", "host_processor_core_count": 20, "host_processor_frequency": "2800MHz", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) CPU (Intel Cascade Lake CPU platform)", "host_processors_per_node": 2, "host_storage_capacity": "150 GiB", "host_storage_type": "Balanced", "hw_notes": "vCPU: 80", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 610000, "max_query_count": 0, "min_duration (ms)": 610000, "min_query_count": 1, "mlperf_version": 1.1, "normalize_cores": 40, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/open/OctoML/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/open/OctoML/results/gcp-n2-standard-80-tvm", "number_of_nodes": 1, "operating_system": "Ubuntu 20.04.2 LTS (Linux-5.8.0-1038-gcp-x86_64-with-glibc2.29)", "other_software_stack": "5.4.0-45-generic; Python 3.8.10; GCC 9.3.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 1024, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "no", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1409100, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "https://zenodo.org/record/4589637/files/resnet50_INT8bit_quantized.pt", "status": "available", "submitter": "OctoML", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/OctoML", "sw_notes": "Automated by MLCommons Collective Knowledge v2.5.8 (https://github.com/mlcommons/ck) and the CK-powered MLPerf submission workflow (https://github.com/mlcommons/ck-mlops/tree/main/module/bench.mlperf.inference)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/gcp-n2-standard-80-tvm", "system_name": "Google (Google Compute Engine) n2-standard-80", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 2100, "task": "image classification", "task2": "image classification", "total_cores": 40, "uid": "b322561d64783937", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "no" } ]