[
  {
    "50.00 percentile latency (ns)": 328741755573,
    "90.00 percentile latency (ns)": 592475607362,
    "95.00 percentile latency (ns)": 625440731611,
    "97.00 percentile latency (ns)": 638629063833,
    "99.00 percentile latency (ns)": 651807039556,
    "99.90 percentile latency (ns)": 657765839725,
    "Max latency (ns)": 658394355893,
    "Mean latency (ns)": 328827803826,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 113174815,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 2476.02,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.113,
    "characteristics.samples_per_second": 2476.02,
    "characteristics.samples_per_second.normalized_per_core": 309.5025,
    "characteristics.samples_per_second.normalized_per_processor": 309.5025,
    "ck_system": "A10x8_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x8_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1630200,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT_Triton",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 2470,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "70402e951d7770fe",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 336900555282,
    "90.00 percentile latency (ns)": 606487268279,
    "95.00 percentile latency (ns)": 640167047114,
    "97.00 percentile latency (ns)": 653620055564,
    "99.00 percentile latency (ns)": 667104480897,
    "99.90 percentile latency (ns)": 678053057352,
    "Max latency (ns)": 682029438456,
    "Mean latency (ns)": 336926736501,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 303075512,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_MultiMigServer",
    "Samples per second": 7315.81,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (7x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.114,
    "characteristics.samples_per_second": 7315.81,
    "characteristics.samples_per_second.normalized_per_core": 914.47625,
    "characteristics.samples_per_second.normalized_per_processor": 914.47625,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 4989600,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton",
    "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB-MIG-7x1g.10gb, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7560,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "d4a4339e41b089e2",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 372443197361,
    "90.00 percentile latency (ns)": 673556525056,
    "95.00 percentile latency (ns)": 711278899772,
    "97.00 percentile latency (ns)": 726392974200,
    "99.00 percentile latency (ns)": 741472090908,
    "99.90 percentile latency (ns)": 748249784682,
    "Max latency (ns)": 749003393030,
    "Mean latency (ns)": 373151017374,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 96400204,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 3383.7,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3383.7,
    "characteristics.samples_per_second.normalized_per_core": 845.925,
    "characteristics.samples_per_second.normalized_per_processor": 845.925,
    "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 1,
    "host_storage_capacity": "10 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2534400,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_Triton",
    "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3840,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 64,
    "uid": "654d029cd43bf4fb",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 361594803099,
    "90.00 percentile latency (ns)": 651346513308,
    "95.00 percentile latency (ns)": 687570677456,
    "97.00 percentile latency (ns)": 702058383809,
    "99.00 percentile latency (ns)": 716552347357,
    "99.90 percentile latency (ns)": 730169280855,
    "Max latency (ns)": 736593845110,
    "Mean latency (ns)": 361659458120,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 65003427,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_MultiMigServer",
    "Samples per second": 3727.43,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30 (4x1g.6gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.116,
    "characteristics.samples_per_second": 3727.43,
    "characteristics.samples_per_second.normalized_per_core": 465.92875,
    "characteristics.samples_per_second.normalized_per_processor": 465.92875,
    "ck_system": "A30-MIG_32x1g.6gb_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_32x1g.6gb_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2745600,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_32x1g.6gb_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (8x A30-MIG-4x1g.6gb, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 4160,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "04ebb0b62df4a4e8",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 351157111198,
    "90.00 percentile latency (ns)": 632534026344,
    "95.00 percentile latency (ns)": 667746564744,
    "97.00 percentile latency (ns)": 681802698794,
    "99.00 percentile latency (ns)": 695857161040,
    "99.90 percentile latency (ns)": 702216326057,
    "Max latency (ns)": 702867593352,
    "Mean latency (ns)": 351151913809,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 165837240,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 3605.8,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3605.8,
    "characteristics.samples_per_second.normalized_per_core": 901.45,
    "characteristics.samples_per_second.normalized_per_processor": 901.45,
    "ck_system": "A100-PCIe-80GB_aarch64x4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 80,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Ampere Altra Q80-30",
    "host_processors_per_node": 1,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GB_aarch64x4_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2534400,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GB_aarch64x4_TRT",
    "system_name": "Gigabyte G242-P31 (4x A100-PCIe-80GB_aarch64, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3840,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 80,
    "uid": "c0242daaa849017e",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 327525747427,
    "90.00 percentile latency (ns)": 589626947656,
    "95.00 percentile latency (ns)": 622385182071,
    "97.00 percentile latency (ns)": 635507049497,
    "99.00 percentile latency (ns)": 648613542516,
    "99.90 percentile latency (ns)": 654509867792,
    "Max latency (ns)": 655116840231,
    "Mean latency (ns)": 327507952063,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 157510911,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 3788.03,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3788.03,
    "characteristics.samples_per_second.normalized_per_core": 473.50375,
    "characteristics.samples_per_second.normalized_per_processor": 473.50375,
    "ck_system": "A30x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2481600,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3760,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "bbd909af04d29055",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 328189634641,
    "90.00 percentile latency (ns)": 590996257353,
    "95.00 percentile latency (ns)": 623852421264,
    "97.00 percentile latency (ns)": 636981424424,
    "99.00 percentile latency (ns)": 650110848823,
    "99.90 percentile latency (ns)": 656028726743,
    "Max latency (ns)": 656696492480,
    "Mean latency (ns)": 328207381619,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 76925165,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 3778.91,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3778.91,
    "characteristics.samples_per_second.normalized_per_core": 472.36375,
    "characteristics.samples_per_second.normalized_per_processor": 472.36375,
    "ck_system": "A30x8_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x8_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2481600,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3760,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "973184e73d34b689",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 346669951301,
    "90.00 percentile latency (ns)": 624630614077,
    "95.00 percentile latency (ns)": 659381772168,
    "97.00 percentile latency (ns)": 673295796066,
    "99.00 percentile latency (ns)": 687176345710,
    "99.90 percentile latency (ns)": 693432673011,
    "Max latency (ns)": 694058132601,
    "Mean latency (ns)": 346747096729,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 183881092,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 7303.13,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 7303.13,
    "characteristics.samples_per_second.normalized_per_core": 912.89125,
    "characteristics.samples_per_second.normalized_per_processor": 912.89125,
    "ck_system": "A100-PCIe-80GBx8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 5068800,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7680,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "bdd21c4cbabe2f07",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 326148317564,
    "90.00 percentile latency (ns)": 587283125286,
    "95.00 percentile latency (ns)": 619930024237,
    "97.00 percentile latency (ns)": 633000002623,
    "99.00 percentile latency (ns)": 646080715082,
    "99.90 percentile latency (ns)": 651926300990,
    "Max latency (ns)": 652532926729,
    "Mean latency (ns)": 326171277144,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 193823630,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 5866.37,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.power": 2164.566921898927,
    "characteristics.power.normalized_per_core": 270.57086523736587,
    "characteristics.power.normalized_per_processor": 270.57086523736587,
    "characteristics.samples_per_second": 5866.37,
    "characteristics.samples_per_second.normalized_per_core": 733.29625,
    "characteristics.samples_per_second.normalized_per_processor": 733.29625,
    "ck_system": "A100-PCIex8_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 3828000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_MaxQ",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 5800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "cb458de91e4ec50c",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 327846242712,
    "90.00 percentile latency (ns)": 590164842431,
    "95.00 percentile latency (ns)": 622921499724,
    "97.00 percentile latency (ns)": 636038461542,
    "99.00 percentile latency (ns)": 649179783337,
    "99.90 percentile latency (ns)": 655067677307,
    "Max latency (ns)": 655690445566,
    "Mean latency (ns)": 327848203210,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 155608246,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 7851.27,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 7851.27,
    "characteristics.samples_per_second.normalized_per_core": 981.40875,
    "characteristics.samples_per_second.normalized_per_processor": 981.40875,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 5148000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT",
    "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "462dce4441a9cdd6",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 371509934897,
    "90.00 percentile latency (ns)": 671962722834,
    "95.00 percentile latency (ns)": 709575583142,
    "97.00 percentile latency (ns)": 724625314588,
    "99.00 percentile latency (ns)": 739697300930,
    "99.90 percentile latency (ns)": 746446631875,
    "Max latency (ns)": 747191340955,
    "Mean latency (ns)": 372243042554,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 178277806,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 3391.9,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3391.9,
    "characteristics.samples_per_second.normalized_per_core": 847.975,
    "characteristics.samples_per_second.normalized_per_processor": 847.975,
    "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 1,
    "host_storage_capacity": "10 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2534400,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3840,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 64,
    "uid": "ce2f428faf70f388",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 327846622906,
    "90.00 percentile latency (ns)": 591295528277,
    "95.00 percentile latency (ns)": 624211315279,
    "97.00 percentile latency (ns)": 637356066187,
    "99.00 percentile latency (ns)": 650530794724,
    "99.90 percentile latency (ns)": 656461405602,
    "Max latency (ns)": 657088284719,
    "Mean latency (ns)": 327770323784,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 32073247,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 115.51,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30 (1x1g.6gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 115.51,
    "characteristics.samples_per_second.normalized_per_core": 115.51,
    "characteristics.samples_per_second.normalized_per_processor": 115.51,
    "ck_system": "A30-MIG_1x1g.6gb_TRT_HeteroMultiUse",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.6gb_TRT_HeteroMultiUse",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 75900,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.6gb_TRT_HeteroMultiUse",
    "system_name": "Gigabyte G482-Z54 (1x A30-MIG-1x1g.6gb, TensorRT, HeteroMultiUse)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 115,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "833f3614191bd701",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 333590611706,
    "90.00 percentile latency (ns)": 599604020494,
    "95.00 percentile latency (ns)": 633101693114,
    "97.00 percentile latency (ns)": 646498244287,
    "99.00 percentile latency (ns)": 659892851815,
    "99.90 percentile latency (ns)": 665634255414,
    "Max latency (ns)": 666202816035,
    "Mean latency (ns)": 333339177572,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 474104960,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 133.743,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 133.743,
    "characteristics.samples_per_second.normalized_per_core": 133.743,
    "characteristics.samples_per_second.normalized_per_processor": 133.743,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 89100,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, HeteroMultiUse)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 135,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "b34aeeb915476dfd",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 384029941686,
    "90.00 percentile latency (ns)": 692082552082,
    "95.00 percentile latency (ns)": 730611925644,
    "97.00 percentile latency (ns)": 746042499458,
    "99.00 percentile latency (ns)": 761457521614,
    "99.90 percentile latency (ns)": 768367656069,
    "Max latency (ns)": 769063494265,
    "Mean latency (ns)": 384205271406,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 183431165,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 3295.44,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.power": 1245.2193758127446,
    "characteristics.power.normalized_per_core": 311.30484395318615,
    "characteristics.power.normalized_per_processor": 311.30484395318615,
    "characteristics.samples_per_second": 3295.44,
    "characteristics.samples_per_second.normalized_per_core": 823.86,
    "characteristics.samples_per_second.normalized_per_processor": 823.86,
    "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 1,
    "host_storage_capacity": "10 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2534400,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3840,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 64,
    "uid": "57d9943becdd5749",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 383886299381,
    "90.00 percentile latency (ns)": 691515428896,
    "95.00 percentile latency (ns)": 729975736504,
    "97.00 percentile latency (ns)": 745360020519,
    "99.00 percentile latency (ns)": 760730685046,
    "99.90 percentile latency (ns)": 767644178323,
    "Max latency (ns)": 768352893666,
    "Mean latency (ns)": 383901901881,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 196252054,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 6596.97,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 6596.97,
    "characteristics.samples_per_second.normalized_per_core": 824.62125,
    "characteristics.samples_per_second.normalized_per_processor": 824.62125,
    "ck_system": "A100-PCIex8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 5068800,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7680,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "b980f73f4154ade4",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 384799662311,
    "90.00 percentile latency (ns)": 693281871248,
    "95.00 percentile latency (ns)": 731833328386,
    "97.00 percentile latency (ns)": 747255120886,
    "99.00 percentile latency (ns)": 762683850138,
    "99.90 percentile latency (ns)": 769616125096,
    "Max latency (ns)": 770390943451,
    "Mean latency (ns)": 384843918416,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 95371431,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 6579.52,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 6579.52,
    "characteristics.samples_per_second.normalized_per_core": 822.44,
    "characteristics.samples_per_second.normalized_per_processor": 822.44,
    "ck_system": "A100-PCIex8_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 5068800,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7680,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "c4216bd34770d95f",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 331360108551,
    "90.00 percentile latency (ns)": 596595629325,
    "95.00 percentile latency (ns)": 629752187224,
    "97.00 percentile latency (ns)": 643021242456,
    "99.00 percentile latency (ns)": 656270357017,
    "99.90 percentile latency (ns)": 662245372031,
    "Max latency (ns)": 662902315112,
    "Mean latency (ns)": 331363870868,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 111097792,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 7765.85,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 7765.85,
    "characteristics.samples_per_second.normalized_per_core": 970.73125,
    "characteristics.samples_per_second.normalized_per_processor": 970.73125,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 5148000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_Triton",
    "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "ac8c643b83aeed49",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 396234696043,
    "90.00 percentile latency (ns)": 714588336206,
    "95.00 percentile latency (ns)": 754423147669,
    "97.00 percentile latency (ns)": 770354018419,
    "99.00 percentile latency (ns)": 786277453351,
    "99.90 percentile latency (ns)": 793437982146,
    "Max latency (ns)": 794201430669,
    "Mean latency (ns)": 396482272162,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 172320800,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 6481.98,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.power": 3171.4148614609558,
    "characteristics.power.normalized_per_core": 396.42685768261947,
    "characteristics.power.normalized_per_processor": 396.42685768261947,
    "characteristics.samples_per_second": 6481.98,
    "characteristics.samples_per_second.normalized_per_core": 810.2475,
    "characteristics.samples_per_second.normalized_per_processor": 810.2475,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 5148000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "d9cc926fda3a8a6e",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 347394346319,
    "90.00 percentile latency (ns)": 626008948444,
    "95.00 percentile latency (ns)": 660847093740,
    "97.00 percentile latency (ns)": 674767414245,
    "99.00 percentile latency (ns)": 688702760667,
    "99.90 percentile latency (ns)": 694970564218,
    "Max latency (ns)": 695655127553,
    "Mean latency (ns)": 347473749136,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 92169701,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 7286.37,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 7286.37,
    "characteristics.samples_per_second.normalized_per_core": 910.79625,
    "characteristics.samples_per_second.normalized_per_processor": 910.79625,
    "ck_system": "A100-PCIe-80GBx8_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx8_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 5068800,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx8_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe-80GB, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7680,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "c24f0baaa8273878",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 330497245829,
    "90.00 percentile latency (ns)": 595553763755,
    "95.00 percentile latency (ns)": 628667095562,
    "97.00 percentile latency (ns)": 641905423281,
    "99.00 percentile latency (ns)": 655159499504,
    "99.90 percentile latency (ns)": 661122208030,
    "Max latency (ns)": 661770508101,
    "Mean latency (ns)": 330502722203,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 128105290,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2473.36,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.113,
    "characteristics.samples_per_second": 2473.36,
    "characteristics.samples_per_second.normalized_per_core": 309.17,
    "characteristics.samples_per_second.normalized_per_processor": 309.17,
    "ck_system": "A10x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1636800,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 2480,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "973ad4d10a0e71f5",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 331067839953,
    "90.00 percentile latency (ns)": 596078510090,
    "95.00 percentile latency (ns)": 629013194551,
    "97.00 percentile latency (ns)": 642270984669,
    "99.00 percentile latency (ns)": 655617877674,
    "99.90 percentile latency (ns)": 661480271469,
    "Max latency (ns)": 662053137050,
    "Mean latency (ns)": 331207179185,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 871929862,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 116.637,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "N/A",
    "accelerator_memory_configuration": "",
    "accelerator_model_name": "N/A",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 0,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 19.858,
    "characteristics.samples_per_second": 116.637,
    "characteristics.samples_per_second.normalized_per_core": 1.0414017857142857,
    "characteristics.samples_per_second.normalized_per_processor": 29.15925,
    "ck_system": "Triton_CPU_4S_8380Hx1",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "OpenVino 2021",
    "host_memory_capacity": "1536 GB",
    "host_memory_configuration": "12 slots / 32GB each / 3200 MT/s per socket",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 8380H CPU @ 2.70GHz",
    "host_processors_per_node": 4,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "fp32",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 112,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/Triton_CPU_4S_8380Hx1",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "Tensorflow 2.4.0, OpenVino 2021, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 77220,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "The original weight filename: https://zenodo.org/record/3228411/files/resnet34-ssd1200.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "CPU Inference on Triton Inference Server",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Triton_CPU_4S_8380Hx1",
    "system_name": "Supermicro SYS-240P-TNRT (Cooper Lake running Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 117,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 112,
    "uid": "103fd44ceead5c9c",
    "use_accelerator": false,
    "weight_data_types": "int8",
    "weight_transformations": "We transfer the weight from fp32 datatype in ONNX file to int8 datatype in OpenVino IR file."
  },
  {
    "50.00 percentile latency (ns)": 327574410523,
    "90.00 percentile latency (ns)": 590307086976,
    "95.00 percentile latency (ns)": 623105845692,
    "97.00 percentile latency (ns)": 636238596901,
    "99.00 percentile latency (ns)": 649393477374,
    "99.90 percentile latency (ns)": 655276087439,
    "Max latency (ns)": 655947565036,
    "Mean latency (ns)": 327602292135,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 109954610,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 2495.32,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 2495.32,
    "characteristics.samples_per_second.normalized_per_core": 311.915,
    "characteristics.samples_per_second.normalized_per_processor": 311.915,
    "ck_system": "A10x8_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x8_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 1636800,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT_Triton",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 2480,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "3be0bb8154df4935",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 337447251427,
    "90.00 percentile latency (ns)": 607613821729,
    "95.00 percentile latency (ns)": 641402444720,
    "97.00 percentile latency (ns)": 654893453073,
    "99.00 percentile latency (ns)": 668711643163,
    "99.90 percentile latency (ns)": 680081682021,
    "Max latency (ns)": 685045578549,
    "Mean latency (ns)": 337527846921,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 305261702,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_MultiMigServer",
    "Samples per second": 7283.6,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (7x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 7283.6,
    "characteristics.samples_per_second.normalized_per_core": 910.45,
    "characteristics.samples_per_second.normalized_per_processor": 910.45,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 4989600,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton",
    "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB-MIG-7x1g.10gb, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7560,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "8ab95a6a71d5124b",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 320264565293,
    "90.00 percentile latency (ns)": 576562877898,
    "95.00 percentile latency (ns)": 608537919834,
    "97.00 percentile latency (ns)": 621351693650,
    "99.00 percentile latency (ns)": 634161279644,
    "99.90 percentile latency (ns)": 639973991175,
    "Max latency (ns)": 640491612199,
    "Mean latency (ns)": 320218700825,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 128797096,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 989.24,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 989.24,
    "characteristics.samples_per_second.normalized_per_core": 989.24,
    "characteristics.samples_per_second.normalized_per_processor": 989.24,
    "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_datacenter",
    "ck_used": true,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_datacenter",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 633600,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_datacenter",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 960,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 240,
    "uid": "250af29406e9e7de",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 337430866301,
    "90.00 percentile latency (ns)": 606567407868,
    "95.00 percentile latency (ns)": 640241113212,
    "97.00 percentile latency (ns)": 653721103294,
    "99.00 percentile latency (ns)": 667162053493,
    "99.90 percentile latency (ns)": 673130533799,
    "Max latency (ns)": 673693595825,
    "Mean latency (ns)": 337309253135,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 1033726779,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 49.9634,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "N/A",
    "accelerator_memory_configuration": "",
    "accelerator_model_name": "N/A",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 0,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 19.858,
    "characteristics.samples_per_second": 49.9634,
    "characteristics.samples_per_second.normalized_per_core": 0.20818083333333334,
    "characteristics.samples_per_second.normalized_per_processor": 24.9817,
    "ck_system": "Triton_CPU_2S_6258Rx1",
    "ck_used": true,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "OpenVino 2021.2.200",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "6 slots / 32GB each / 2934 MT/s per socket",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "fp32",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 240,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Triton_CPU_2S_6258Rx1",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_hardware": "",
    "other_software_stack": "OpenVino 2021.2.200, Triton 21.02; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "No",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 33660,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "The original weight filename: https://zenodo.org/record/3228411/files/resnet34-ssd1200.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "CPU Inference on Triton Inference ServerPowered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Triton_CPU_2S_6258Rx1",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 51,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 240,
    "uid": "2aa164615a54b2b6",
    "use_accelerator": false,
    "weight_data_types": "int8",
    "weight_transformations": "We transfer the weight from fp32 datatype in ONNX file to int8 datatype in OpenVino IR file."
  },
  {
    "50.00 percentile latency (ns)": 321370061522,
    "90.00 percentile latency (ns)": 578659440729,
    "95.00 percentile latency (ns)": 610812579253,
    "97.00 percentile latency (ns)": 623679949336,
    "99.00 percentile latency (ns)": 636544225590,
    "99.90 percentile latency (ns)": 642326254172,
    "Max latency (ns)": 642910930576,
    "Mean latency (ns)": 321364843771,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 64015399,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 985.518,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 985.518,
    "characteristics.samples_per_second.normalized_per_core": 985.518,
    "characteristics.samples_per_second.normalized_per_processor": 985.518,
    "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_Triton_datacenter",
    "ck_used": true,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_Triton_datacenter",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 633600,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_Triton_datacenter",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 960,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 240,
    "uid": "32c6a5ac02c5435d",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 330365884247,
    "90.00 percentile latency (ns)": 594695520159,
    "95.00 percentile latency (ns)": 627740585791,
    "97.00 percentile latency (ns)": 640952937227,
    "99.00 percentile latency (ns)": 654184149008,
    "99.90 percentile latency (ns)": 660133263752,
    "Max latency (ns)": 660743714015,
    "Mean latency (ns)": 330349222070,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 161691849,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 3755.77,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 3755.77,
    "characteristics.samples_per_second.normalized_per_core": 469.47125,
    "characteristics.samples_per_second.normalized_per_processor": 469.47125,
    "ck_system": "A30x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 2481600,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3760,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "fd135cfd3934fa6b",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 329420628890,
    "90.00 percentile latency (ns)": 593031832251,
    "95.00 percentile latency (ns)": 625966837841,
    "97.00 percentile latency (ns)": 639146416012,
    "99.00 percentile latency (ns)": 652337559485,
    "99.90 percentile latency (ns)": 658264058058,
    "Max latency (ns)": 658916410232,
    "Mean latency (ns)": 329393789445,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 76333117,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 3766.18,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 3766.18,
    "characteristics.samples_per_second.normalized_per_core": 470.7725,
    "characteristics.samples_per_second.normalized_per_processor": 470.7725,
    "ck_system": "A30x8_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x8_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0, Triton 21.02",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 2481600,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT_Triton",
    "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3760,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "920fc04013a59860",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 328402716772,
    "90.00 percentile latency (ns)": 591554875525,
    "95.00 percentile latency (ns)": 624469110698,
    "97.00 percentile latency (ns)": 637639955294,
    "99.00 percentile latency (ns)": 650787917616,
    "99.90 percentile latency (ns)": 656700056950,
    "Max latency (ns)": 657307516827,
    "Mean latency (ns)": 328456795204,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 212814563,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 5823.76,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.power": 2244.0585996955815,
    "characteristics.power.normalized_per_core": 280.5073249619477,
    "characteristics.power.normalized_per_processor": 280.5073249619477,
    "characteristics.samples_per_second": 5823.76,
    "characteristics.samples_per_second.normalized_per_core": 727.97,
    "characteristics.samples_per_second.normalized_per_processor": 727.97,
    "ck_system": "A100-PCIex8_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 3828000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_MaxQ",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 5800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "2d1336c19845c6a2",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 326517797686,
    "90.00 percentile latency (ns)": 587998552822,
    "95.00 percentile latency (ns)": 620666257163,
    "97.00 percentile latency (ns)": 633718759505,
    "99.00 percentile latency (ns)": 646809270727,
    "99.90 percentile latency (ns)": 652707950238,
    "Max latency (ns)": 653342446661,
    "Mean latency (ns)": 326556367049,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 160685227,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 7879.48,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 7879.48,
    "characteristics.samples_per_second.normalized_per_core": 984.935,
    "characteristics.samples_per_second.normalized_per_processor": 984.935,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 5148000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT",
    "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "18c048a643ceadfa",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 373818762656,
    "90.00 percentile latency (ns)": 676383304534,
    "95.00 percentile latency (ns)": 714226431634,
    "97.00 percentile latency (ns)": 729404369852,
    "99.00 percentile latency (ns)": 744544674229,
    "99.90 percentile latency (ns)": 751346992272,
    "Max latency (ns)": 752060867852,
    "Mean latency (ns)": 374565991714,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 186982342,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 3369.94,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 3369.94,
    "characteristics.samples_per_second.normalized_per_core": 842.485,
    "characteristics.samples_per_second.normalized_per_processor": 842.485,
    "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 1,
    "host_storage_capacity": "10 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 2534400,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3840,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 64,
    "uid": "b1f44a897d612d49",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 333635239659,
    "90.00 percentile latency (ns)": 599642793344,
    "95.00 percentile latency (ns)": 633129465394,
    "97.00 percentile latency (ns)": 646530954155,
    "99.00 percentile latency (ns)": 659926658639,
    "99.90 percentile latency (ns)": 665672419683,
    "Max latency (ns)": 666241931174,
    "Mean latency (ns)": 333431654962,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 479184833,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 133.735,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 133.735,
    "characteristics.samples_per_second.normalized_per_core": 133.735,
    "characteristics.samples_per_second.normalized_per_processor": 133.735,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 89100,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "system_name": "NVIDIA DGX-A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, HeteroMultiUse)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 135,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "ef29a71744365957",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 406745722907,
    "90.00 percentile latency (ns)": 733281665658,
    "95.00 percentile latency (ns)": 774193331972,
    "97.00 percentile latency (ns)": 790553358341,
    "99.00 percentile latency (ns)": 806902681982,
    "99.90 percentile latency (ns)": 814246956500,
    "Max latency (ns)": 815012768613,
    "Mean latency (ns)": 406908569410,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 195567530,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 3109.64,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.power": 1275.1095705521473,
    "characteristics.power.normalized_per_core": 318.7773926380368,
    "characteristics.power.normalized_per_processor": 318.7773926380368,
    "characteristics.samples_per_second": 3109.64,
    "characteristics.samples_per_second.normalized_per_core": 777.41,
    "characteristics.samples_per_second.normalized_per_processor": 777.41,
    "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 1,
    "host_storage_capacity": "10 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 2534400,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3840,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 64,
    "uid": "35c891d7594d83ad",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 384385355752,
    "90.00 percentile latency (ns)": 692681209242,
    "95.00 percentile latency (ns)": 731214446616,
    "97.00 percentile latency (ns)": 746616470204,
    "99.00 percentile latency (ns)": 762032375616,
    "99.90 percentile latency (ns)": 768984458436,
    "Max latency (ns)": 769686117664,
    "Mean latency (ns)": 384472883128,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 176038912,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 6585.54,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 6585.54,
    "characteristics.samples_per_second.normalized_per_core": 823.1925,
    "characteristics.samples_per_second.normalized_per_processor": 823.1925,
    "ck_system": "A100-PCIex8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 5068800,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7680,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "76a7bbb614d039aa",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 327612408243,
    "90.00 percentile latency (ns)": 589918158944,
    "95.00 percentile latency (ns)": 622721431982,
    "97.00 percentile latency (ns)": 635840694415,
    "99.00 percentile latency (ns)": 648957775774,
    "99.90 percentile latency (ns)": 654860473882,
    "Max latency (ns)": 655506716770,
    "Mean latency (ns)": 327630884219,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 108629099,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 7853.47,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 7853.47,
    "characteristics.samples_per_second.normalized_per_core": 981.68375,
    "characteristics.samples_per_second.normalized_per_processor": 981.68375,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 5148000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_Triton",
    "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "17a643debc97e476",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 374466390061,
    "90.00 percentile latency (ns)": 673989726624,
    "95.00 percentile latency (ns)": 711393490200,
    "97.00 percentile latency (ns)": 726376141461,
    "99.00 percentile latency (ns)": 741350513376,
    "99.90 percentile latency (ns)": 748094981606,
    "Max latency (ns)": 748817193377,
    "Mean latency (ns)": 374436828157,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 172340458,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 6874.84,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.power": 3503.1100267379707,
    "characteristics.power.normalized_per_core": 437.88875334224633,
    "characteristics.power.normalized_per_processor": 437.88875334224633,
    "characteristics.samples_per_second": 6874.84,
    "characteristics.samples_per_second.normalized_per_core": 859.355,
    "characteristics.samples_per_second.normalized_per_processor": 859.355,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 5148000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "f8adb1350f14922b",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 327679292252,
    "90.00 percentile latency (ns)": 590230712394,
    "95.00 percentile latency (ns)": 623047985756,
    "97.00 percentile latency (ns)": 636169646815,
    "99.00 percentile latency (ns)": 649298036376,
    "99.90 percentile latency (ns)": 655199445415,
    "Max latency (ns)": 655870851098,
    "Mean latency (ns)": 327649023828,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 124789670,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2495.61,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 2495.61,
    "characteristics.samples_per_second.normalized_per_core": 311.95125,
    "characteristics.samples_per_second.normalized_per_processor": 311.95125,
    "ck_system": "A10x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 1636800,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 2480,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "8f214e64fd8921c2",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 330645868815,
    "90.00 percentile latency (ns)": 594675652382,
    "95.00 percentile latency (ns)": 627649206139,
    "97.00 percentile latency (ns)": 640879101592,
    "99.00 percentile latency (ns)": 654062066282,
    "99.90 percentile latency (ns)": 659993687306,
    "Max latency (ns)": 660477990941,
    "Mean latency (ns)": 330617303184,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 933172395,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 111.919,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "N/A",
    "accelerator_memory_configuration": "",
    "accelerator_model_name": "N/A",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 0,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 19.858,
    "characteristics.samples_per_second": 111.919,
    "characteristics.samples_per_second.normalized_per_core": 0.9992767857142857,
    "characteristics.samples_per_second.normalized_per_processor": 27.97975,
    "ck_system": "Triton_CPU_4S_8380Hx1",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "OpenVino 2021.2.200",
    "host_memory_capacity": "1536 GB",
    "host_memory_configuration": "6 slots / 32GB each / 3200 MT/s per socket",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8380H CPU @ 2.90GHz",
    "host_processors_per_node": 4,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "fp32",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 112,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Triton_CPU_4S_8380Hx1",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.5 LTS",
    "other_hardware": "",
    "other_software_stack": "Tensorflow 2.4.0, OpenVino 2021.2.200, Triton 21.02",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "No",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 73920,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "The original weight filename: https://zenodo.org/record/3228411/files/resnet34-ssd1200.onnx",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "CPU Inference on Triton Inference Server",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Triton_CPU_4S_8380Hx1",
    "system_name": "Supermicro SYS-240P-TNRT (Cooper Lake running Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 112,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 112,
    "uid": "d934fac8ef303153",
    "use_accelerator": false,
    "weight_data_types": "int8",
    "weight_transformations": "We transfer the weight from fp32 datatype in ONNX file to int8 datatype in OpenVino IR file."
  },
  {
    "50.00 percentile latency (ns)": 32590041184,
    "90.00 percentile latency (ns)": 58637430271,
    "95.00 percentile latency (ns)": 61892713301,
    "97.00 percentile latency (ns)": 63199303457,
    "99.00 percentile latency (ns)": 64499614462,
    "99.90 percentile latency (ns)": 65087471084,
    "Max latency (ns)": 65148797534,
    "Mean latency (ns)": 32593705296,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 67714790,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 91779.7,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA TITAN RTX",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "characteristics.mAP": 22.912,
    "characteristics.samples_per_second": 91779.7,
    "characteristics.samples_per_second.normalized_per_core": 22944.925,
    "characteristics.samples_per_second.normalized_per_processor": 22944.925,
    "ck_system": "TitanRTXx4",
    "ck_used": false,
    "cooling": "watercooled",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 24,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) 8268",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3.84 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-small",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.5,
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/TitanRTXx4",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.3",
    "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 256,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 3133965575612453542,
    "retraining": "N",
    "sample_index_rng_seed": 665484352860916858,
    "samples_per_query": 5979336,
    "schedule_rng_seed": 3622009729038561421,
    "starting_weights_filename": "frozen_inference_graph.pb",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/TitanRTXx4",
    "system_name": "SCAN 3XS DBP T496X2 Fluid",
    "target_latency (ns)": 0,
    "target_qps": 90596,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 48,
    "uid": "b60d653f97440780",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 32337106921,
    "90.00 percentile latency (ns)": 58184487813,
    "95.00 percentile latency (ns)": 61437358509,
    "97.00 percentile latency (ns)": 62754189290,
    "99.00 percentile latency (ns)": 64065019932,
    "99.90 percentile latency (ns)": 64613028998,
    "Max latency (ns)": 64677492021,
    "Mean latency (ns)": 32346601073,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 86248046,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1661.29,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA TITAN RTX",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "characteristics.mAP": 20.067,
    "characteristics.samples_per_second": 1661.29,
    "characteristics.samples_per_second.normalized_per_core": 415.3225,
    "characteristics.samples_per_second.normalized_per_processor": 415.3225,
    "ck_system": "TitanRTXx4",
    "ck_used": false,
    "cooling": "watercooled",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 24,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) 8268",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3.84 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-large",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.5,
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/TitanRTXx4",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.3",
    "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 3133965575612453542,
    "retraining": "N",
    "sample_index_rng_seed": 665484352860916858,
    "samples_per_query": 107448,
    "schedule_rng_seed": 3622009729038561421,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/TitanRTXx4",
    "system_name": "SCAN 3XS DBP T496X2 Fluid",
    "target_latency (ns)": 0,
    "target_qps": 1628,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 48,
    "uid": "4baaad281aa59daf",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 32882294118,
    "90.00 percentile latency (ns)": 59132229510,
    "95.00 percentile latency (ns)": 62419244380,
    "97.00 percentile latency (ns)": 63702720143,
    "99.00 percentile latency (ns)": 65036336829,
    "99.90 percentile latency (ns)": 65595120120,
    "Max latency (ns)": 65793722371,
    "Mean latency (ns)": 32865761363,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 68568924,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2485.77,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA Xavier",
    "accelerator_on-chip_memories": "1MB (128KB/SM) L1 + 512KB L2 + 4MB (DLA)",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "characteristics.mAP": 22.926,
    "characteristics.samples_per_second": 2485.77,
    "characteristics.samples_per_second.normalized_per_core": 2485.77,
    "characteristics.samples_per_second.normalized_per_processor": 2485.77,
    "ck_system": "Xavier",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "JetPack 4.3 DP, TensorRT 6.0, cuDNN 7.6.3, CUDA 10.0, cub 1.8.0",
    "host_memory_capacity": "16 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "8MB L2 (2MB per dual cluster)/4 MB L3 (shared)",
    "host_processor_core_count": 8,
    "host_processor_frequency": "2265.5 MHz",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "eMMC 5.1",
    "hw_notes": "GPU and both DLAs are used in Offline and MultiStream scenarios",
    "informal_model": "ssd-small",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.5,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/Xavier",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.3",
    "other_software_stack": "pycuda 2019.1, pytorch 1.1, torchvision 0.2.2.post3",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 256,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 3133965575612453542,
    "retraining": "N",
    "sample_index_rng_seed": 665484352860916858,
    "samples_per_query": 163548,
    "schedule_rng_seed": 3622009729038561421,
    "starting_weights_filename": "frozen_inference_graph.pb",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier",
    "system_name": "NVIDIA Jetson AGX Xavier",
    "target_latency (ns)": 0,
    "target_qps": 2478,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 8,
    "uid": "5a10da6e80963554",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 248302143947,
    "90.00 percentile latency (ns)": 446843674201,
    "95.00 percentile latency (ns)": 471622790543,
    "97.00 percentile latency (ns)": 481559201342,
    "99.00 percentile latency (ns)": 491494910408,
    "99.90 percentile latency (ns)": 495920091502,
    "Max latency (ns)": 496486165893,
    "Mean latency (ns)": 248272510287,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 59450575,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 49.4999,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "Shared with host",
    "accelerator_memory_configuration": "SRAM",
    "accelerator_model_name": "NVIDIA Xavier",
    "accelerator_on-chip_memories": "1MB (128KB/SM) L1 + 512KB L2 + 4MB (DLA)",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "characteristics.mAP": 20.057,
    "characteristics.samples_per_second": 49.4999,
    "characteristics.samples_per_second.normalized_per_core": 49.4999,
    "characteristics.samples_per_second.normalized_per_processor": 49.4999,
    "ck_system": "Xavier",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "JetPack 4.3 DP, TensorRT 6.0, cuDNN 7.6.3, CUDA 10.0, cub 1.8.0",
    "host_memory_capacity": "16 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "8MB L2 (2MB per dual cluster)/4 MB L3 (shared)",
    "host_processor_core_count": 8,
    "host_processor_frequency": "2265.5 MHz",
    "host_processor_interconnect": "",
    "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)",
    "host_processors_per_node": 1,
    "host_storage_capacity": "32 GB",
    "host_storage_type": "eMMC 5.1",
    "hw_notes": "GPU and both DLAs are used in Offline and MultiStream scenarios",
    "informal_model": "ssd-large",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.5,
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/Xavier",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.3",
    "other_software_stack": "pycuda 2019.1, pytorch 1.1, torchvision 0.2.2.post3",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 3133965575612453542,
    "retraining": "N",
    "sample_index_rng_seed": 665484352860916858,
    "samples_per_query": 24576,
    "schedule_rng_seed": 3622009729038561421,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier",
    "system_name": "NVIDIA Jetson AGX Xavier",
    "target_latency (ns)": 0,
    "target_qps": 50,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 8,
    "uid": "f108617042580e4a",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 34305726774,
    "90.00 percentile latency (ns)": 61917972007,
    "95.00 percentile latency (ns)": 65380104228,
    "97.00 percentile latency (ns)": 66771242096,
    "99.00 percentile latency (ns)": 68155573649,
    "99.90 percentile latency (ns)": 68779300476,
    "Max latency (ns)": 68848870687,
    "Mean latency (ns)": 34368119791,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 116920460,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 143084,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA Tesla T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 20,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "characteristics.mAP": 22.912,
    "characteristics.samples_per_second": 143084,
    "characteristics.samples_per_second.normalized_per_core": 7154.2,
    "characteristics.samples_per_second.normalized_per_processor": 7154.2,
    "ck_system": "T4x20",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "ssd-small",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.5,
    "normalize_cores": 20,
    "normalize_processors": 20,
    "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x20",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.3",
    "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 256,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 3133965575612453542,
    "retraining": "N",
    "sample_index_rng_seed": 665484352860916858,
    "samples_per_query": 9851160,
    "schedule_rng_seed": 3622009729038561421,
    "starting_weights_filename": "frozen_inference_graph.pb",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20",
    "system_name": "Supermicro 6049GP-TRT-OTO-29 20xT4",
    "target_latency (ns)": 0,
    "target_qps": 149260,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "145a0a5376692458",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 31863080615,
    "90.00 percentile latency (ns)": 57541465186,
    "95.00 percentile latency (ns)": 60752974240,
    "97.00 percentile latency (ns)": 62040205616,
    "99.00 percentile latency (ns)": 63325326945,
    "99.90 percentile latency (ns)": 63900443315,
    "Max latency (ns)": 63955066872,
    "Mean latency (ns)": 31916684723,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 88756294,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2765.69,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA Tesla T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 20,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "characteristics.mAP": 20.067,
    "characteristics.samples_per_second": 2765.69,
    "characteristics.samples_per_second.normalized_per_core": 138.2845,
    "characteristics.samples_per_second.normalized_per_processor": 138.2845,
    "ck_system": "T4x20",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "ssd-large",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.5,
    "normalize_cores": 20,
    "normalize_processors": 20,
    "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x20",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.3",
    "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 3133965575612453542,
    "retraining": "N",
    "sample_index_rng_seed": 665484352860916858,
    "samples_per_query": 176880,
    "schedule_rng_seed": 3622009729038561421,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20",
    "system_name": "Supermicro 6049GP-TRT-OTO-29 20xT4",
    "target_latency (ns)": 0,
    "target_qps": 2680,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "3e55f84852a43916",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 32237545326,
    "90.00 percentile latency (ns)": 58217740073,
    "95.00 percentile latency (ns)": 61478437514,
    "97.00 percentile latency (ns)": 62786622311,
    "99.00 percentile latency (ns)": 64090969983,
    "99.90 percentile latency (ns)": 64671774306,
    "Max latency (ns)": 64734010021,
    "Mean latency (ns)": 32282204228,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 52891639,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 60871.6,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA Tesla T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "characteristics.mAP": 22.912,
    "characteristics.samples_per_second": 60871.6,
    "characteristics.samples_per_second.normalized_per_core": 7608.95,
    "characteristics.samples_per_second.normalized_per_processor": 7608.95,
    "ck_system": "T4x8",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "ssd-small",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.5,
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x8",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.3",
    "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 256,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 3133965575612453542,
    "retraining": "N",
    "sample_index_rng_seed": 665484352860916858,
    "samples_per_query": 3940464,
    "schedule_rng_seed": 3622009729038561421,
    "starting_weights_filename": "frozen_inference_graph.pb",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 8xT4",
    "target_latency (ns)": 0,
    "target_qps": 59704,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "a699543f99551719",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 32178954093,
    "90.00 percentile latency (ns)": 58108269585,
    "95.00 percentile latency (ns)": 61353496966,
    "97.00 percentile latency (ns)": 62659351625,
    "99.00 percentile latency (ns)": 63942731711,
    "99.90 percentile latency (ns)": 64541132014,
    "Max latency (ns)": 64607480650,
    "Mean latency (ns)": 32222073007,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 82755724,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1095.11,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA Tesla T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "characteristics.mAP": 20.067,
    "characteristics.samples_per_second": 1095.11,
    "characteristics.samples_per_second.normalized_per_core": 136.88875,
    "characteristics.samples_per_second.normalized_per_processor": 136.88875,
    "ck_system": "T4x8",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "ssd-large",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.5,
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x8",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.3",
    "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 3133965575612453542,
    "retraining": "N",
    "sample_index_rng_seed": 665484352860916858,
    "samples_per_query": 70752,
    "schedule_rng_seed": 3622009729038561421,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 8xT4",
    "target_latency (ns)": 0,
    "target_qps": 1072,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "a9de7c063f1eac63",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 31887151177,
    "90.00 percentile latency (ns)": 57470534496,
    "95.00 percentile latency (ns)": 60676992387,
    "97.00 percentile latency (ns)": 61946981994,
    "99.00 percentile latency (ns)": 63250849180,
    "99.90 percentile latency (ns)": 63815413060,
    "Max latency (ns)": 63863943442,
    "Mean latency (ns)": 31912274130,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 110229473,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 7750.85,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-SXM4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 7750.85,
    "characteristics.samples_per_second.normalized_per_core": 968.85625,
    "characteristics.samples_per_second.normalized_per_processor": 968.85625,
    "ck_system": "DGX-A100_A100-SXM4x8_TRT_Triton",
    "ck_used": true,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x8_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 495000,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x8_TRT_Triton",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7500,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 240,
    "uid": "a9680b5f8b614d2d",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 33076459328,
    "90.00 percentile latency (ns)": 59871573629,
    "95.00 percentile latency (ns)": 63247165577,
    "97.00 percentile latency (ns)": 64592696099,
    "99.00 percentile latency (ns)": 65938420773,
    "99.90 percentile latency (ns)": 66538682564,
    "Max latency (ns)": 66607160900,
    "Mean latency (ns)": 33160229866,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 80836625,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2774.48,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 20,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 2774.48,
    "characteristics.samples_per_second.normalized_per_core": 138.724,
    "characteristics.samples_per_second.normalized_per_processor": 138.724,
    "ck_system": "T4x20_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 20,
    "normalize_processors": 20,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x20_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 184800,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20_TRT",
    "system_name": "Supermicro 6049GP-TRT-OTO-29 (20x T4, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 2800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "7cd605423d8ec02d",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 33719678815,
    "90.00 percentile latency (ns)": 60790200438,
    "95.00 percentile latency (ns)": 64164298835,
    "97.00 percentile latency (ns)": 65524285040,
    "99.00 percentile latency (ns)": 66886776667,
    "99.90 percentile latency (ns)": 67505566631,
    "Max latency (ns)": 67529908249,
    "Mean latency (ns)": 33735412373,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 158578613,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1688.85,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 2,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 1688.85,
    "characteristics.samples_per_second.normalized_per_core": 844.425,
    "characteristics.samples_per_second.normalized_per_processor": 844.425,
    "ck_system": "A100-PCIex2_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 2,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex2_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 114048,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex2_TRT",
    "system_name": "Gigabyte G482-Z52 (2x A100-PCIe, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1728,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "75cd50c4e4ba50f9",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 31719098742,
    "90.00 percentile latency (ns)": 57212626129,
    "95.00 percentile latency (ns)": 60389113008,
    "97.00 percentile latency (ns)": 61657466405,
    "99.00 percentile latency (ns)": 62943088452,
    "99.90 percentile latency (ns)": 63507434343,
    "Max latency (ns)": 63561625733,
    "Mean latency (ns)": 31750420557,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 67032994,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 7787.72,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-SXM4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 7787.72,
    "characteristics.samples_per_second.normalized_per_core": 973.465,
    "characteristics.samples_per_second.normalized_per_processor": 973.465,
    "ck_system": "DGX-A100_A100-SXM4x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x8_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 495000,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x8_TRT",
    "system_name": "NVIDIA DGX-A100 (8x A100-SXM4, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 7500,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "d28749a392e620a7",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 32934076506,
    "90.00 percentile latency (ns)": 59577103989,
    "95.00 percentile latency (ns)": 62916483692,
    "97.00 percentile latency (ns)": 64238140951,
    "99.00 percentile latency (ns)": 65578303476,
    "99.90 percentile latency (ns)": 66184920032,
    "Max latency (ns)": 66249571966,
    "Mean latency (ns)": 33012894105,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 80594832,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1111.8,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 1111.8,
    "characteristics.samples_per_second.normalized_per_core": 138.975,
    "characteristics.samples_per_second.normalized_per_processor": 138.975,
    "ck_system": "T4x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x8_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 73656,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8_TRT",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x T4, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1116,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "41c9b30ee6571842",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 33350208247,
    "90.00 percentile latency (ns)": 60366225014,
    "95.00 percentile latency (ns)": 63745303622,
    "97.00 percentile latency (ns)": 65113242161,
    "99.00 percentile latency (ns)": 66469152364,
    "99.90 percentile latency (ns)": 67078599116,
    "Max latency (ns)": 67160358317,
    "Mean latency (ns)": 33431853846,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 88673682,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 2751.62,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 20,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 2751.62,
    "characteristics.samples_per_second.normalized_per_core": 137.581,
    "characteristics.samples_per_second.normalized_per_processor": 137.581,
    "ck_system": "T4x20_TRT_Triton",
    "ck_used": true,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 20,
    "normalize_processors": 20,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x20_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 184800,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20_TRT_Triton",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 2800,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 240,
    "uid": "6bb7dff40b2aa23a",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 32985326381,
    "90.00 percentile latency (ns)": 59622533252,
    "95.00 percentile latency (ns)": 62957233557,
    "97.00 percentile latency (ns)": 64297502947,
    "99.00 percentile latency (ns)": 65636051794,
    "99.90 percentile latency (ns)": 66239157191,
    "Max latency (ns)": 66299890423,
    "Mean latency (ns)": 33043012682,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 80073590,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "TRTIS_Server",
    "Samples per second": 1110.95,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 1110.95,
    "characteristics.samples_per_second.normalized_per_core": 138.86875,
    "characteristics.samples_per_second.normalized_per_processor": 138.86875,
    "ck_system": "T4x8_TRT_Triton",
    "ck_used": true,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x8_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 73656,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8_TRT_Triton",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1116,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 240,
    "uid": "5b97bf014a318ce9",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 33876432112,
    "90.00 percentile latency (ns)": 61137093076,
    "95.00 percentile latency (ns)": 64559774309,
    "97.00 percentile latency (ns)": 65930405754,
    "99.00 percentile latency (ns)": 67321799666,
    "99.90 percentile latency (ns)": 67944230283,
    "Max latency (ns)": 67985032263,
    "Mean latency (ns)": 33918062281,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 81057353,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 1677.55,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 2,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.111,
    "characteristics.samples_per_second": 1677.55,
    "characteristics.samples_per_second.normalized_per_core": 838.775,
    "characteristics.samples_per_second.normalized_per_processor": 838.775,
    "ck_system": "A100-PCIex2_TRT_Triton",
    "ck_used": true,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 2,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex2_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 64,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 114048,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex2_TRT_Triton",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1728,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 240,
    "uid": "9469a89188011afa",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  }
]