[
  {
    "50.00 percentile latency (ns)": 322967819756,
    "90.00 percentile latency (ns)": 581611144071,
    "95.00 percentile latency (ns)": 613957395552,
    "97.00 percentile latency (ns)": 626848549972,
    "99.00 percentile latency (ns)": 639740402823,
    "99.90 percentile latency (ns)": 645549075667,
    "Max latency (ns)": 646191094997,
    "Mean latency (ns)": 323004989827,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 127041716,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 19303.9,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 19303.9,
    "characteristics.samples_per_second.normalized_per_core": 19303.9,
    "characteristics.samples_per_second.normalized_per_processor": 19303.9,
    "characteristics.total": 50000,
    "ck_system": "A30x1_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x1_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 12474000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x1_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (1x A30, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 18900,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "cc86494d9d539d11",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 349468277162,
    "90.00 percentile latency (ns)": 629729967119,
    "95.00 percentile latency (ns)": 664741887480,
    "97.00 percentile latency (ns)": 678797571622,
    "99.00 percentile latency (ns)": 692823334296,
    "99.90 percentile latency (ns)": 699083218914,
    "Max latency (ns)": 699836421702,
    "Mean latency (ns)": 349556832786,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 174203087,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1092.08,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA Xavier NX",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.062,
    "characteristics.good": 38031,
    "characteristics.power": 19.967452857142877,
    "characteristics.power.normalized_per_core": 19.967452857142877,
    "characteristics.power.normalized_per_processor": 19.967452857142877,
    "characteristics.samples_per_second": 1092.08,
    "characteristics.samples_per_second.normalized_per_core": 1092.08,
    "characteristics.samples_per_second.normalized_per_processor": 1092.08,
    "characteristics.total": 50000,
    "ck_system": "Xavier_NX_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2",
    "host_memory_capacity": "8 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 6,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "Micro SD Card",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/Xavier_NX_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04",
    "other_hardware": "",
    "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 764280,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT_MaxQ",
    "system_name": "Auvidea JNX30 Xavier NX (MaxQ, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 1158,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 6,
    "uid": "48dfcc163355ef63",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 326288470460,
    "90.00 percentile latency (ns)": 588914614587,
    "95.00 percentile latency (ns)": 631267055296,
    "97.00 percentile latency (ns)": 648217611579,
    "99.00 percentile latency (ns)": 665145599978,
    "99.90 percentile latency (ns)": 672762621373,
    "Max latency (ns)": 673583332665,
    "Mean latency (ns)": 327412630896,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 103262149,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "DLA_Triton_Server",
    "Samples per second": 1959.67,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA AGX Xavier",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.05,
    "characteristics.good": 38025,
    "characteristics.samples_per_second": 1959.67,
    "characteristics.samples_per_second.normalized_per_core": 1959.67,
    "characteristics.samples_per_second.normalized_per_processor": 1959.67,
    "characteristics.total": 50000,
    "ck_system": "AGX_Xavier_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2",
    "host_memory_capacity": "32 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 8,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "eMMC 5.1",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04",
    "other_hardware": "",
    "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1320000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT_Triton",
    "system_name": "NVIDIA Jetson AGX Xavier 32GB (TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 2000,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 8,
    "uid": "8f7a636346641298",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 328284581222,
    "90.00 percentile latency (ns)": 591252771414,
    "95.00 percentile latency (ns)": 624087996103,
    "97.00 percentile latency (ns)": 637283799410,
    "99.00 percentile latency (ns)": 650417411957,
    "99.90 percentile latency (ns)": 656347525913,
    "Max latency (ns)": 656953993541,
    "Mean latency (ns)": 328290258942,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 123367292,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 32148.4,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 32148.4,
    "characteristics.samples_per_second.normalized_per_core": 32148.4,
    "characteristics.samples_per_second.normalized_per_processor": 32148.4,
    "characteristics.total": 50000,
    "ck_system": "A100-PCIex1_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 21120000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (1x A100-PCIe, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 32000,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "8054b694e71860bd",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 353258637116,
    "90.00 percentile latency (ns)": 635460962183,
    "95.00 percentile latency (ns)": 670678476430,
    "97.00 percentile latency (ns)": 684760042819,
    "99.00 percentile latency (ns)": 698855858777,
    "99.90 percentile latency (ns)": 705177525431,
    "Max latency (ns)": 705924832390,
    "Mean latency (ns)": 353178813958,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 65878989,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2039.11,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA AGX Xavier",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 2039.11,
    "characteristics.samples_per_second.normalized_per_core": 2039.11,
    "characteristics.samples_per_second.normalized_per_processor": 2039.11,
    "characteristics.total": 50000,
    "ck_system": "AGX_Xavier_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2",
    "host_memory_capacity": "32 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 8,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "eMMC 5.1",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04",
    "other_hardware": "",
    "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1439460,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT",
    "system_name": "NVIDIA Jetson AGX Xavier 32GB (TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 2181,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 8,
    "uid": "b45f1bffd5baca08",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 327872759360,
    "90.00 percentile latency (ns)": 590099425137,
    "95.00 percentile latency (ns)": 622907204589,
    "97.00 percentile latency (ns)": 635969997780,
    "99.00 percentile latency (ns)": 649081918183,
    "99.90 percentile latency (ns)": 655014590489,
    "Max latency (ns)": 655634610719,
    "Mean latency (ns)": 327854903878,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 52498870,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 5133.96,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 5133.96,
    "characteristics.samples_per_second.normalized_per_core": 5133.96,
    "characteristics.samples_per_second.normalized_per_processor": 5133.96,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 3366000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton",
    "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 5100,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "d9e32156740e9a25",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 330496946058,
    "90.00 percentile latency (ns)": 595496193193,
    "95.00 percentile latency (ns)": 628610482111,
    "97.00 percentile latency (ns)": 641883721845,
    "99.00 percentile latency (ns)": 655099294866,
    "99.90 percentile latency (ns)": 661042222001,
    "Max latency (ns)": 661696977026,
    "Mean latency (ns)": 330569082075,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 217468238,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 36905.1,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 36905.1,
    "characteristics.samples_per_second.normalized_per_core": 36905.1,
    "characteristics.samples_per_second.normalized_per_processor": 36905.1,
    "characteristics.total": 50000,
    "ck_system": "A100-PCIe-80GBx1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx1_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 24420000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx1_TRT",
    "system_name": "Gigabyte G482-Z54 (1x A100-PCIe-80GB, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 37000,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "74d17296b24b30b2",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 305016996243,
    "90.00 percentile latency (ns)": 548933228747,
    "95.00 percentile latency (ns)": 579375238446,
    "97.00 percentile latency (ns)": 591513352082,
    "99.00 percentile latency (ns)": 603854658781,
    "99.90 percentile latency (ns)": 609204386915,
    "Max latency (ns)": 609790033692,
    "Mean latency (ns)": 305002523442,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 384763347,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 39830.1,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 39830.1,
    "characteristics.samples_per_second.normalized_per_core": 39830.1,
    "characteristics.samples_per_second.normalized_per_processor": 39830.1,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 24288000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT",
    "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "854422c66d74f59a",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 291440132056,
    "90.00 percentile latency (ns)": 553528013308,
    "95.00 percentile latency (ns)": 590594098771,
    "97.00 percentile latency (ns)": 605428964925,
    "99.00 percentile latency (ns)": 620263278138,
    "99.90 percentile latency (ns)": 626929982400,
    "Max latency (ns)": 627648946065,
    "Mean latency (ns)": 297869154774,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 335371536,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "DLA_Triton_Server",
    "Samples per second": 1156.7,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA Xavier NX",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.05,
    "characteristics.good": 38025,
    "characteristics.samples_per_second": 1156.7,
    "characteristics.samples_per_second.normalized_per_core": 1156.7,
    "characteristics.samples_per_second.normalized_per_processor": 1156.7,
    "characteristics.total": 50000,
    "ck_system": "Xavier_NX_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2",
    "host_memory_capacity": "8 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 6,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "Micro SD Card",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/Xavier_NX_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04",
    "other_hardware": "",
    "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 726000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT_Triton",
    "system_name": "NVIDIA Jetson Xavier NX (TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 1100,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 6,
    "uid": "3479da77377bc69b",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 325641686834,
    "90.00 percentile latency (ns)": 586166374659,
    "95.00 percentile latency (ns)": 618718734709,
    "97.00 percentile latency (ns)": 631784218685,
    "99.00 percentile latency (ns)": 644765108714,
    "99.90 percentile latency (ns)": 650634681158,
    "Max latency (ns)": 651243386660,
    "Mean latency (ns)": 325688808224,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 131489983,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 4965.89,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30 (1x1g.6gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 4965.89,
    "characteristics.samples_per_second.normalized_per_core": 4965.89,
    "characteristics.samples_per_second.normalized_per_processor": 4965.89,
    "characteristics.total": 50000,
    "ck_system": "A30-MIG_1x1g.6gb_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.6gb_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 3234000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.6gb_TRT",
    "system_name": "Gigabyte G482-Z54 (1x A30-MIG-1x1g.6gb, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 4900,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "f0416fa8eaaa72e0",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 335346945065,
    "90.00 percentile latency (ns)": 603305056926,
    "95.00 percentile latency (ns)": 636810784375,
    "97.00 percentile latency (ns)": 650206943937,
    "99.00 percentile latency (ns)": 663611451324,
    "99.90 percentile latency (ns)": 669648558129,
    "Max latency (ns)": 670282144159,
    "Mean latency (ns)": 335333693261,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 157765404,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1506.53,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA AGX Xavier",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 75.99,
    "characteristics.good": 37995,
    "characteristics.power": 25.240244776119393,
    "characteristics.power.normalized_per_core": 25.240244776119393,
    "characteristics.power.normalized_per_processor": 25.240244776119393,
    "characteristics.samples_per_second": 1506.53,
    "characteristics.samples_per_second.normalized_per_core": 1506.53,
    "characteristics.samples_per_second.normalized_per_processor": 1506.53,
    "characteristics.total": 50000,
    "ck_system": "AGX_Xavier_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2",
    "host_memory_capacity": "32 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 8,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "eMMC 5.1",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04",
    "other_hardware": "",
    "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1009800,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT_MaxQ",
    "system_name": "Auvidea X220-LC AGX Xavier 32GB (MaxQ, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 1530,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 8,
    "uid": "bf428cb0fdda363e",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 330468785751,
    "90.00 percentile latency (ns)": 595352871940,
    "95.00 percentile latency (ns)": 628455726712,
    "97.00 percentile latency (ns)": 641727318295,
    "99.00 percentile latency (ns)": 654947293450,
    "99.90 percentile latency (ns)": 660889549630,
    "Max latency (ns)": 661587711846,
    "Mean latency (ns)": 330537834723,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 74421072,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 36911.2,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 36911.2,
    "characteristics.samples_per_second.normalized_per_core": 36911.2,
    "characteristics.samples_per_second.normalized_per_processor": 36911.2,
    "characteristics.total": 50000,
    "ck_system": "A100-PCIe-80GBx1_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx1_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 24420000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx1_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (1x A100-PCIe-80GB, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 37000,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "d5cc484889286ae6",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 310635832982,
    "90.00 percentile latency (ns)": 558943385836,
    "95.00 percentile latency (ns)": 589912904050,
    "97.00 percentile latency (ns)": 602355906049,
    "99.00 percentile latency (ns)": 614800502270,
    "99.90 percentile latency (ns)": 620358270719,
    "Max latency (ns)": 620945049877,
    "Mean latency (ns)": 310615449448,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 82494137,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 13817.6,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 13817.6,
    "characteristics.samples_per_second.normalized_per_core": 13817.6,
    "characteristics.samples_per_second.normalized_per_processor": 13817.6,
    "characteristics.total": 50000,
    "ck_system": "A10x1_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x1_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 8580000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x1_TRT_Triton",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x A10, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 13000,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 56,
    "uid": "4c7abc4f55026aaf",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 305416316387,
    "90.00 percentile latency (ns)": 549788000721,
    "95.00 percentile latency (ns)": 580335217881,
    "97.00 percentile latency (ns)": 592542799302,
    "99.00 percentile latency (ns)": 604752061414,
    "99.90 percentile latency (ns)": 610263600972,
    "Max latency (ns)": 610850202110,
    "Mean latency (ns)": 305428390107,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 87278663,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 39761,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 39761,
    "characteristics.samples_per_second.normalized_per_core": 39761.0,
    "characteristics.samples_per_second.normalized_per_processor": 39761.0,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 24288000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_Triton",
    "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "fb6319d962d41443",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 310813514942,
    "90.00 percentile latency (ns)": 559428727743,
    "95.00 percentile latency (ns)": 590434364590,
    "97.00 percentile latency (ns)": 602893100358,
    "99.00 percentile latency (ns)": 615354502280,
    "99.90 percentile latency (ns)": 620916724785,
    "Max latency (ns)": 621503988450,
    "Mean latency (ns)": 310830312799,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 186420392,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 13805.2,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 13805.2,
    "characteristics.samples_per_second.normalized_per_core": 13805.2,
    "characteristics.samples_per_second.normalized_per_processor": 13805.2,
    "characteristics.total": 50000,
    "ck_system": "A10x1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x1_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 8580000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x1_TRT",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x A10, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 13000,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 56,
    "uid": "2fd910e581c1f45b",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 328974940263,
    "90.00 percentile latency (ns)": 592026562371,
    "95.00 percentile latency (ns)": 624811427781,
    "97.00 percentile latency (ns)": 637932953264,
    "99.00 percentile latency (ns)": 651226267188,
    "99.90 percentile latency (ns)": 657113460441,
    "Max latency (ns)": 657650657247,
    "Mean latency (ns)": 328902513748,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 301240215,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 18265,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 18265,
    "characteristics.samples_per_second.normalized_per_core": 18265.0,
    "characteristics.samples_per_second.normalized_per_processor": 18265.0,
    "characteristics.total": 50000,
    "ck_system": "A30x1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x1_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 12012000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x1_TRT",
    "system_name": "Gigabyte G482-Z54 (1x A30, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 18200,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "bc921a82c3ee4bd7",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 316686070606,
    "90.00 percentile latency (ns)": 569861695580,
    "95.00 percentile latency (ns)": 601547107136,
    "97.00 percentile latency (ns)": 614162450789,
    "99.00 percentile latency (ns)": 626874650552,
    "99.90 percentile latency (ns)": 632557726894,
    "Max latency (ns)": 633159375455,
    "Mean latency (ns)": 316642232718,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 116792421,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 5316.2,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 5316.2,
    "characteristics.samples_per_second.normalized_per_core": 5316.2,
    "characteristics.samples_per_second.normalized_per_processor": 5316.2,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 3366000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT",
    "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 5100,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "d2e3b060f4f0292b",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 313026382610,
    "90.00 percentile latency (ns)": 563391048535,
    "95.00 percentile latency (ns)": 594659721149,
    "97.00 percentile latency (ns)": 607211681184,
    "99.00 percentile latency (ns)": 619697849775,
    "99.90 percentile latency (ns)": 625345383246,
    "Max latency (ns)": 625931508388,
    "Mean latency (ns)": 313057351521,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 113948517,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 5166.7,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30 (1x1g.6gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 5166.7,
    "characteristics.samples_per_second.normalized_per_core": 5166.7,
    "characteristics.samples_per_second.normalized_per_processor": 5166.7,
    "characteristics.total": 50000,
    "ck_system": "A30-MIG_1x1g.6gb_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.6gb_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 3234000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.6gb_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (1x A30-MIG-1x1g.6gb, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 4900,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "ab3fb3493781ee60",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 322064794338,
    "90.00 percentile latency (ns)": 579908438152,
    "95.00 percentile latency (ns)": 612096897437,
    "97.00 percentile latency (ns)": 625032248202,
    "99.00 percentile latency (ns)": 637910885494,
    "99.90 percentile latency (ns)": 643724337808,
    "Max latency (ns)": 644319062549,
    "Mean latency (ns)": 322068308495,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 244408380,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 32778.8,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.042,
    "characteristics.good": 38021,
    "characteristics.samples_per_second": 32778.8,
    "characteristics.samples_per_second.normalized_per_core": 32778.8,
    "characteristics.samples_per_second.normalized_per_processor": 32778.8,
    "characteristics.total": 50000,
    "ck_system": "A100-PCIex1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 21120000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT",
    "system_name": "Gigabyte G482-Z54 (1x A100-PCIe, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 32000,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "f5d2323114c22889",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 306930141746,
    "90.00 percentile latency (ns)": 552464208708,
    "95.00 percentile latency (ns)": 583131704588,
    "97.00 percentile latency (ns)": 595424284378,
    "99.00 percentile latency (ns)": 607677499518,
    "99.90 percentile latency (ns)": 613244782121,
    "Max latency (ns)": 613830033056,
    "Mean latency (ns)": 306972158930,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 153883401,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1245.1,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA Xavier NX",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.002,
    "characteristics.good": 38001,
    "characteristics.samples_per_second": 1245.1,
    "characteristics.samples_per_second.normalized_per_core": 1245.1,
    "characteristics.samples_per_second.normalized_per_processor": 1245.1,
    "characteristics.total": 50000,
    "ck_system": "Xavier_NX_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2",
    "host_memory_capacity": "8 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 6,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "Micro SD Card",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/Xavier_NX_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04",
    "other_hardware": "",
    "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 764280,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT",
    "system_name": "NVIDIA Jetson Xavier NX (TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 1158,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 6,
    "uid": "9242f2142474d65b",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 351462896747,
    "90.00 percentile latency (ns)": 632711750174,
    "95.00 percentile latency (ns)": 667950963087,
    "97.00 percentile latency (ns)": 681981011585,
    "99.00 percentile latency (ns)": 696015262434,
    "99.90 percentile latency (ns)": 702334702893,
    "Max latency (ns)": 703035660372,
    "Mean latency (ns)": 351466255336,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 137572234,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 17743.1,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 17743.1,
    "characteristics.samples_per_second.normalized_per_core": 17743.1,
    "characteristics.samples_per_second.normalized_per_processor": 17743.1,
    "characteristics.total": 50000,
    "ck_system": "A30x1_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x1_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0, Triton 21.02",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 12474000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x1_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (1x A30, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 18900,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "088957a728bd443c",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 351474152001,
    "90.00 percentile latency (ns)": 632512736535,
    "95.00 percentile latency (ns)": 667623637550,
    "97.00 percentile latency (ns)": 681660495379,
    "99.00 percentile latency (ns)": 695670043790,
    "99.90 percentile latency (ns)": 702012637829,
    "Max latency (ns)": 702671319689,
    "Mean latency (ns)": 351446696742,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 201415526,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1087.68,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA Xavier NX",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.056,
    "characteristics.good": 38028,
    "characteristics.power": 19.697395448079668,
    "characteristics.power.normalized_per_core": 19.697395448079668,
    "characteristics.power.normalized_per_processor": 19.697395448079668,
    "characteristics.samples_per_second": 1087.68,
    "characteristics.samples_per_second.normalized_per_core": 1087.68,
    "characteristics.samples_per_second.normalized_per_processor": 1087.68,
    "characteristics.total": 50000,
    "ck_system": "Xavier_NX_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2",
    "host_memory_capacity": "8 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 6,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "Micro SD Card",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Xavier_NX_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2, cuDNN 8.0.0, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 764280,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT_MaxQ",
    "system_name": "NVIDIA Jetson Xavier NX (MaxQ, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 1158,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 6,
    "uid": "0ae50e22414c6a72",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 385387293599,
    "90.00 percentile latency (ns)": 694620171331,
    "95.00 percentile latency (ns)": 733282194189,
    "97.00 percentile latency (ns)": 748737047233,
    "99.00 percentile latency (ns)": 764184379349,
    "99.90 percentile latency (ns)": 771163887138,
    "Max latency (ns)": 771904835642,
    "Mean latency (ns)": 385490698525,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 131809238,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 31465,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 31465,
    "characteristics.samples_per_second.normalized_per_core": 31465.0,
    "characteristics.samples_per_second.normalized_per_processor": 31465.0,
    "characteristics.total": 50000,
    "ck_system": "A100-PCIex1_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 24288000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (1x A100-PCIe, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "7ccdf8d35d40420f",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 347633525564,
    "90.00 percentile latency (ns)": 625493747051,
    "95.00 percentile latency (ns)": 660132023394,
    "97.00 percentile latency (ns)": 673971062890,
    "99.00 percentile latency (ns)": 687925655654,
    "99.90 percentile latency (ns)": 694191665306,
    "Max latency (ns)": 694885933720,
    "Mean latency (ns)": 347619529025,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 69029256,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2071.51,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA AGX Xavier",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.074,
    "characteristics.good": 38037,
    "characteristics.samples_per_second": 2071.51,
    "characteristics.samples_per_second.normalized_per_core": 2071.51,
    "characteristics.samples_per_second.normalized_per_processor": 2071.51,
    "characteristics.total": 50000,
    "ck_system": "AGX_Xavier_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2",
    "host_memory_capacity": "32 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 8,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "eMMC 5.1",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2, cuDNN 8.0.0, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 1439460,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT",
    "system_name": "NVIDIA Jetson AGX Xavier 32GB (TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 2181,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 8,
    "uid": "5026ec8d0a7f44d2",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 319633640398,
    "90.00 percentile latency (ns)": 575242056333,
    "95.00 percentile latency (ns)": 607135182102,
    "97.00 percentile latency (ns)": 619844492931,
    "99.00 percentile latency (ns)": 632778921274,
    "99.90 percentile latency (ns)": 638382136860,
    "Max latency (ns)": 638995757223,
    "Mean latency (ns)": 319637175007,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 431233807,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 38009.6,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 38009.6,
    "characteristics.samples_per_second.normalized_per_core": 38009.6,
    "characteristics.samples_per_second.normalized_per_processor": 38009.6,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_edge",
    "ck_used": true,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_edge",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 24288000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_edge",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 240,
    "uid": "78aba7f8f73bbc56",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 333259113598,
    "90.00 percentile latency (ns)": 599783376511,
    "95.00 percentile latency (ns)": 633129884119,
    "97.00 percentile latency (ns)": 646409518593,
    "99.00 percentile latency (ns)": 659738273915,
    "99.90 percentile latency (ns)": 665769341267,
    "Max latency (ns)": 666399739872,
    "Mean latency (ns)": 333238077054,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 52996588,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 5051.02,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 5051.02,
    "characteristics.samples_per_second.normalized_per_core": 5051.02,
    "characteristics.samples_per_second.normalized_per_processor": 5051.02,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 3366000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton",
    "system_name": "NVIDIA DGX-A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 5100,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "fb1d93b26450e99a",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 359795986750,
    "90.00 percentile latency (ns)": 647400324820,
    "95.00 percentile latency (ns)": 683379334067,
    "97.00 percentile latency (ns)": 697778700990,
    "99.00 percentile latency (ns)": 712163619656,
    "99.90 percentile latency (ns)": 718640631114,
    "Max latency (ns)": 719325342693,
    "Mean latency (ns)": 359748305745,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 141929075,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1403.82,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA AGX Xavier",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 75.942,
    "characteristics.good": 37971,
    "characteristics.power": 25.204840055632843,
    "characteristics.power.normalized_per_core": 25.204840055632843,
    "characteristics.power.normalized_per_processor": 25.204840055632843,
    "characteristics.samples_per_second": 1403.82,
    "characteristics.samples_per_second.normalized_per_core": 1403.82,
    "characteristics.samples_per_second.normalized_per_processor": 1403.82,
    "characteristics.total": 50000,
    "ck_system": "AGX_Xavier_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2",
    "host_memory_capacity": "32 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 8,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "eMMC 5.1",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2, cuDNN 8.0.0, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 1009800,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT_MaxQ",
    "system_name": "NVIDIA Jetson AGX Xavier 32GB (MaxQ, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 1530,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 8,
    "uid": "b159e0d5aa356b82",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 330103498059,
    "90.00 percentile latency (ns)": 594138200769,
    "95.00 percentile latency (ns)": 627148064306,
    "97.00 percentile latency (ns)": 640362990917,
    "99.00 percentile latency (ns)": 653578286906,
    "99.90 percentile latency (ns)": 659532311093,
    "Max latency (ns)": 660150780953,
    "Mean latency (ns)": 330119998291,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 70909323,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 4049.07,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30 (1x1g.3gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 4049.07,
    "characteristics.samples_per_second.normalized_per_core": 4049.07,
    "characteristics.samples_per_second.normalized_per_processor": 4049.07,
    "characteristics.total": 50000,
    "ck_system": "A30-MIG_1x1g.3gb_TRT",
    "ck_used": true,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.3gb_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 2673000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.3gb_TRT",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 4050,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 240,
    "uid": "588bb461d3ec9c54",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 322358998321,
    "90.00 percentile latency (ns)": 581112417823,
    "95.00 percentile latency (ns)": 613386079132,
    "97.00 percentile latency (ns)": 626353383600,
    "99.00 percentile latency (ns)": 639321866318,
    "99.90 percentile latency (ns)": 645112939687,
    "Max latency (ns)": 645723538232,
    "Mean latency (ns)": 322466184189,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 84497360,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 13287.4,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 13287.4,
    "characteristics.samples_per_second.normalized_per_core": 13287.4,
    "characteristics.samples_per_second.normalized_per_processor": 13287.4,
    "characteristics.total": 50000,
    "ck_system": "A10x1_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x1_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 8580000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x1_TRT_Triton",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x A10, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 13000,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 56,
    "uid": "d9d881f604683875",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 319453417540,
    "90.00 percentile latency (ns)": 575090323108,
    "95.00 percentile latency (ns)": 607050125337,
    "97.00 percentile latency (ns)": 619817296654,
    "99.00 percentile latency (ns)": 632585932327,
    "99.90 percentile latency (ns)": 638350012152,
    "Max latency (ns)": 638963355886,
    "Mean latency (ns)": 319466977522,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 90520472,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 38011.6,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 38011.6,
    "characteristics.samples_per_second.normalized_per_core": 38011.6,
    "characteristics.samples_per_second.normalized_per_processor": 38011.6,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_Triton_edge",
    "ck_used": true,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_Triton_edge",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 24288000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_Triton_edge",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 240,
    "uid": "c7542058d1628a0e",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 322651684294,
    "90.00 percentile latency (ns)": 581618445085,
    "95.00 percentile latency (ns)": 613917015909,
    "97.00 percentile latency (ns)": 626897189615,
    "99.00 percentile latency (ns)": 639873975224,
    "99.90 percentile latency (ns)": 645668715420,
    "Max latency (ns)": 646281200290,
    "Mean latency (ns)": 322772677752,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 192028133,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 13276,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 13276,
    "characteristics.samples_per_second.normalized_per_core": 13276.0,
    "characteristics.samples_per_second.normalized_per_processor": 13276.0,
    "characteristics.total": 50000,
    "ck_system": "A10x1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x1_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 8580000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x1_TRT",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x A10, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 13000,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 56,
    "uid": "1a8e5524e47e18f6",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 343713652714,
    "90.00 percentile latency (ns)": 618487370341,
    "95.00 percentile latency (ns)": 652750871925,
    "97.00 percentile latency (ns)": 666454812223,
    "99.00 percentile latency (ns)": 680334144311,
    "99.90 percentile latency (ns)": 686482783588,
    "Max latency (ns)": 687042061363,
    "Mean latency (ns)": 343612980214,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 329693970,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 17483.6,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 17483.6,
    "characteristics.samples_per_second.normalized_per_core": 17483.6,
    "characteristics.samples_per_second.normalized_per_processor": 17483.6,
    "characteristics.total": 50000,
    "ck_system": "A30x1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x1_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 12012000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x1_TRT",
    "system_name": "Gigabyte G482-Z54 (1x A30, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 18200,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "d45d69cc3d1d2428",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 330818236190,
    "90.00 percentile latency (ns)": 595448633336,
    "95.00 percentile latency (ns)": 628533907593,
    "97.00 percentile latency (ns)": 641776416499,
    "99.00 percentile latency (ns)": 655018803476,
    "99.90 percentile latency (ns)": 660985766011,
    "Max latency (ns)": 661605457357,
    "Mean latency (ns)": 330839596872,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 68592672,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 4040.17,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30 (1x1g.3gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 4040.17,
    "characteristics.samples_per_second.normalized_per_core": 4040.17,
    "characteristics.samples_per_second.normalized_per_processor": 4040.17,
    "characteristics.total": 50000,
    "ck_system": "A30-MIG_1x1g.3gb_TRT_Triton",
    "ck_used": true,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.3gb_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0, Triton 21.02; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 2673000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.3gb_TRT_Triton",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 4050,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 240,
    "uid": "c91f645a879d733f",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 325544107864,
    "90.00 percentile latency (ns)": 585854500469,
    "95.00 percentile latency (ns)": 618436246369,
    "97.00 percentile latency (ns)": 631407670675,
    "99.00 percentile latency (ns)": 644478740895,
    "99.90 percentile latency (ns)": 650322197744,
    "Max latency (ns)": 650939350702,
    "Mean latency (ns)": 325514934710,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 123821126,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 5170.99,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 5170.99,
    "characteristics.samples_per_second.normalized_per_core": 5170.99,
    "characteristics.samples_per_second.normalized_per_processor": 5170.99,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 3366000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT",
    "system_name": "NVIDIA DGX-A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 5100,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "787114f61be67af2",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 390704595246,
    "90.00 percentile latency (ns)": 703745866595,
    "95.00 percentile latency (ns)": 742806909382,
    "97.00 percentile latency (ns)": 758417278824,
    "99.00 percentile latency (ns)": 774272925773,
    "99.90 percentile latency (ns)": 781131661243,
    "Max latency (ns)": 781881456198,
    "Mean latency (ns)": 390777143303,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 515177038,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 31063.5,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.03,
    "characteristics.good": 38015,
    "characteristics.samples_per_second": 31063.5,
    "characteristics.samples_per_second.normalized_per_core": 31063.5,
    "characteristics.samples_per_second.normalized_per_processor": 31063.5,
    "characteristics.total": 50000,
    "ck_system": "A100-PCIex1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 24288000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT",
    "system_name": "Gigabyte G482-Z54 (1x A100-PCIe, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "338fc3a40008e246",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 328123766723,
    "90.00 percentile latency (ns)": 590607789167,
    "95.00 percentile latency (ns)": 623393529243,
    "97.00 percentile latency (ns)": 636533771976,
    "99.00 percentile latency (ns)": 649604984238,
    "99.90 percentile latency (ns)": 655551053365,
    "Max latency (ns)": 656182507951,
    "Mean latency (ns)": 328134807863,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 100736136,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1164.74,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA Xavier NX",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 76.008,
    "characteristics.good": 38004,
    "characteristics.samples_per_second": 1164.74,
    "characteristics.samples_per_second.normalized_per_core": 1164.74,
    "characteristics.samples_per_second.normalized_per_processor": 1164.74,
    "characteristics.total": 50000,
    "ck_system": "Xavier_NX_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2",
    "host_memory_capacity": "8 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 6,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "Micro SD Card",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Xavier_NX_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2, cuDNN 8.0.0, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2048,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 764280,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT",
    "system_name": "NVIDIA Jetson Xavier NX (TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 1158,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 6,
    "uid": "940ccb4ed707f8e3",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 32528506943,
    "90.00 percentile latency (ns)": 58363474181,
    "95.00 percentile latency (ns)": 61640343107,
    "97.00 percentile latency (ns)": 62952835737,
    "99.00 percentile latency (ns)": 64262605329,
    "99.90 percentile latency (ns)": 64803192636,
    "Max latency (ns)": 64803451601,
    "Mean latency (ns)": 32488938767,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 231712947,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 37479.5,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-SXM4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 75.92,
    "characteristics.good": 37960,
    "characteristics.samples_per_second": 37479.5,
    "characteristics.samples_per_second.normalized_per_core": 37479.5,
    "characteristics.samples_per_second.normalized_per_processor": 37479.5,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM4x1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x1_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 2428800,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x1_TRT",
    "system_name": "NVIDIA DGX-A100 (1x A100-SXM4, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "f5b9b74896a048bc",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 247516698022,
    "90.00 percentile latency (ns)": 445525416180,
    "95.00 percentile latency (ns)": 470307505775,
    "97.00 percentile latency (ns)": 480168307095,
    "99.00 percentile latency (ns)": 490081844695,
    "99.90 percentile latency (ns)": 494568340229,
    "Max latency (ns)": 495012769998,
    "Mean latency (ns)": 247531985092,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 56202692,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 4906.54,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "5GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-SXM4 (1x1g.5gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 75.92,
    "characteristics.good": 37960,
    "characteristics.samples_per_second": 4906.54,
    "characteristics.samples_per_second.normalized_per_core": 4906.54,
    "characteristics.samples_per_second.normalized_per_processor": 4906.54,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT_Triton",
    "ck_used": true,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 2428800,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT_Triton",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 240,
    "uid": "0bf791407481db12",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 36212344586,
    "90.00 percentile latency (ns)": 65392056636,
    "95.00 percentile latency (ns)": 69027857523,
    "97.00 percentile latency (ns)": 70508313208,
    "99.00 percentile latency (ns)": 71987853538,
    "99.90 percentile latency (ns)": 72610744892,
    "Max latency (ns)": 72669258464,
    "Mean latency (ns)": 36275886656,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 81792095,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 33422.7,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 75.92,
    "characteristics.good": 37960,
    "characteristics.samples_per_second": 33422.7,
    "characteristics.samples_per_second.normalized_per_core": 33422.7,
    "characteristics.samples_per_second.normalized_per_processor": 33422.7,
    "characteristics.total": 50000,
    "ck_system": "A100-PCIex1_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 2428800,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT_Triton",
    "system_name": "Gigabyte G482-Z52 (1x A100-PCIe, TensorRT, Triton)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "ad8b3af49a53ef89",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 34680455964,
    "90.00 percentile latency (ns)": 62399971442,
    "95.00 percentile latency (ns)": 65896640134,
    "97.00 percentile latency (ns)": 67286382889,
    "99.00 percentile latency (ns)": 68684653619,
    "99.90 percentile latency (ns)": 69289858533,
    "Max latency (ns)": 69387274072,
    "Mean latency (ns)": 34678919143,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 47291628,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2074.53,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA AGX Xavier",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 76.004,
    "characteristics.good": 38002,
    "characteristics.samples_per_second": 2074.53,
    "characteristics.samples_per_second.normalized_per_core": 2074.53,
    "characteristics.samples_per_second.normalized_per_processor": 2074.53,
    "characteristics.total": 50000,
    "ck_system": "AGX_Xavier_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "20.09 Jetson CUDA-X AI Developer Preview, TensorRT 7.2, CUDA 10.2",
    "host_memory_capacity": "32GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 8,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32GB",
    "host_storage_type": "eMMC 5.1",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "20.09 Jetson CUDA-X AI Developer Preview, TensorRT 7.2, CUDA 10.2, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 143946,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT",
    "system_name": "NVIDIA Jetson AGX Xavier 32GB (TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 2181,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 8,
    "uid": "472b7c32d244e3ba",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 32302534797,
    "90.00 percentile latency (ns)": 58230343632,
    "95.00 percentile latency (ns)": 61451645596,
    "97.00 percentile latency (ns)": 62759933097,
    "99.00 percentile latency (ns)": 64068780359,
    "99.90 percentile latency (ns)": 64615804441,
    "Max latency (ns)": 64667616920,
    "Mean latency (ns)": 32341275416,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 77394751,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 37558.2,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-SXM4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 75.92,
    "characteristics.good": 37960,
    "characteristics.samples_per_second": 37558.2,
    "characteristics.samples_per_second.normalized_per_core": 37558.2,
    "characteristics.samples_per_second.normalized_per_processor": 37558.2,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM4x1_TRT_Triton",
    "ck_used": true,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x1_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 2428800,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x1_TRT_Triton",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 240,
    "uid": "7d23cf4a83a93fdf",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 244544780257,
    "90.00 percentile latency (ns)": 440205887834,
    "95.00 percentile latency (ns)": 464635770058,
    "97.00 percentile latency (ns)": 474427615236,
    "99.00 percentile latency (ns)": 484222988670,
    "99.90 percentile latency (ns)": 488654901721,
    "Max latency (ns)": 489042139072,
    "Mean latency (ns)": 244581352269,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 121269449,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 4966.44,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "5GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-SXM4 (1x1g.5gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 75.92,
    "characteristics.good": 37960,
    "characteristics.samples_per_second": 4966.44,
    "characteristics.samples_per_second.normalized_per_core": 4966.44,
    "characteristics.samples_per_second.normalized_per_processor": 4966.44,
    "characteristics.total": 50000,
    "ck_system": "DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT",
    "ck_used": true,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 2428800,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 240,
    "uid": "aa930f2fd469cb1e",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 33457832988,
    "90.00 percentile latency (ns)": 60278769159,
    "95.00 percentile latency (ns)": 63658462115,
    "97.00 percentile latency (ns)": 64983405720,
    "99.00 percentile latency (ns)": 66311664833,
    "99.90 percentile latency (ns)": 66953511856,
    "Max latency (ns)": 66982998730,
    "Mean latency (ns)": 33479817621,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 76266622,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 6010.48,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 75.92,
    "characteristics.good": 37960,
    "characteristics.samples_per_second": 6010.48,
    "characteristics.samples_per_second.normalized_per_core": 6010.48,
    "characteristics.samples_per_second.normalized_per_processor": 6010.48,
    "characteristics.total": 50000,
    "ck_system": "T4x1_TRT_Triton",
    "ck_used": true,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x1_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 402600,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x1_TRT_Triton",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 6100,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 240,
    "uid": "b5862fac7ff4b29b",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 33255380983,
    "90.00 percentile latency (ns)": 60015237975,
    "95.00 percentile latency (ns)": 63390155083,
    "97.00 percentile latency (ns)": 64712811909,
    "99.00 percentile latency (ns)": 66035478769,
    "99.90 percentile latency (ns)": 66675066906,
    "Max latency (ns)": 66704545042,
    "Mean latency (ns)": 33292417809,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 41964764,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 6035.57,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 75.92,
    "characteristics.good": 37960,
    "characteristics.samples_per_second": 6035.57,
    "characteristics.samples_per_second.normalized_per_core": 6035.57,
    "characteristics.samples_per_second.normalized_per_processor": 6035.57,
    "characteristics.total": 50000,
    "ck_system": "T4x1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x1_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 402600,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x1_TRT",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x T4, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 6100,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 56,
    "uid": "a4ac1b12c069d3e1",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 37995670884,
    "90.00 percentile latency (ns)": 68294246754,
    "95.00 percentile latency (ns)": 72155735537,
    "97.00 percentile latency (ns)": 73701068003,
    "99.00 percentile latency (ns)": 75238359606,
    "99.90 percentile latency (ns)": 75880185133,
    "Max latency (ns)": 75880666664,
    "Mean latency (ns)": 37982048262,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 268158113,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 32008.2,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 75.92,
    "characteristics.good": 37960,
    "characteristics.samples_per_second": 32008.2,
    "characteristics.samples_per_second.normalized_per_core": 32008.2,
    "characteristics.samples_per_second.normalized_per_processor": 32008.2,
    "characteristics.total": 50000,
    "ck_system": "A100-PCIex1_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 2428800,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT",
    "system_name": "Gigabyte G482-Z52 (1x A100-PCIe, TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 36800,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 128,
    "uid": "9a4aed34a419e3b9",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 32774724408,
    "90.00 percentile latency (ns)": 59214271801,
    "95.00 percentile latency (ns)": 62508961339,
    "97.00 percentile latency (ns)": 63833898753,
    "99.00 percentile latency (ns)": 65205994231,
    "99.90 percentile latency (ns)": 65749030801,
    "Max latency (ns)": 65855841356,
    "Mean latency (ns)": 32797227936,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 93143984,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1032.25,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA Xavier NX",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 75.994,
    "characteristics.good": 37997,
    "characteristics.samples_per_second": 1032.25,
    "characteristics.samples_per_second.normalized_per_core": 1032.25,
    "characteristics.samples_per_second.normalized_per_processor": 1032.25,
    "characteristics.total": 50000,
    "ck_system": "Xavier_NX_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "ImageNet 2012",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "resnet50-v1.5",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "20.09 Jetson CUDA-X AI Developer Preview, TensorRT 7.2, CUDA 10.2",
    "host_memory_capacity": "8GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 6,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32GB",
    "host_storage_type": "Micro SD Card",
    "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios",
    "informal_model": "resnet50",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/Xavier_NX_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "20.09 Jetson CUDA-X AI Developer Preview, TensorRT 7.2, CUDA 10.2, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2048,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 67980,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet50_v1.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT",
    "system_name": "NVIDIA Jetson Xavier NX (TensorRT)",
    "system_type": "edge",
    "target_latency (ns)": 0,
    "target_qps": 1030,
    "task": "image classification",
    "task2": "image classification",
    "total_cores": 6,
    "uid": "efec8f424f548a0b",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  }
]