[
  {
    "50.00 percentile latency (ns)": 565960002330,
    "90.00 percentile latency (ns)": 1019381439660,
    "95.00 percentile latency (ns)": 1076053434734,
    "97.00 percentile latency (ns)": 1098755481503,
    "99.00 percentile latency (ns)": 1121405966107,
    "99.90 percentile latency (ns)": 1131593332246,
    "Max latency (ns)": 1132713906381,
    "Mean latency (ns)": 565984510145,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 69772274,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2450.72,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIE-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 3,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 2450.72,
    "characteristics.samples_per_second.normalized_per_core": 816.9066666666666,
    "characteristics.samples_per_second.normalized_per_processor": 816.9066666666666,
    "ck_system": "R7525_A100-PCIE-40GBx3_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 32,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7502",
    "host_processors_per_node": 2,
    "host_storage_capacity": "1.8 TB",
    "host_storage_type": "SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 3,
    "normalize_processors": 3,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R7525_A100-PCIE-40GBx3_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2775960,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100-PCIE-40GBx3_TRT",
    "system_name": "Dell EMC PowerEdge R7525 (3x A100-PCIE-40GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 4206,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 64,
    "uid": "67284b6f7d9465f8",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 349678570448,
    "90.00 percentile latency (ns)": 627890682830,
    "95.00 percentile latency (ns)": 662653344431,
    "97.00 percentile latency (ns)": 676589843678,
    "99.00 percentile latency (ns)": 690515148784,
    "99.90 percentile latency (ns)": 696736981595,
    "Max latency (ns)": 697428480782,
    "Mean latency (ns)": 349296488333,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 164900421,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 3633.92,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-PCIE-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3633.92,
    "characteristics.samples_per_second.normalized_per_core": 908.48,
    "characteristics.samples_per_second.normalized_per_processor": 908.48,
    "ck_system": "R750xa_A100-PCIE-80GBx4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3.5 TB",
    "host_storage_type": "SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750xa_A100-PCIE-80GBx4_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2534400,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-80GBx4_TRT",
    "system_name": "Dell EMC PowerEdge R750xa (4x A100-PCIE-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3840,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "a5983930b0569124",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 356142413718,
    "90.00 percentile latency (ns)": 639924078287,
    "95.00 percentile latency (ns)": 675382824978,
    "97.00 percentile latency (ns)": 689534884751,
    "99.00 percentile latency (ns)": 703770961601,
    "99.90 percentile latency (ns)": 714176036311,
    "Max latency (ns)": 719056950521,
    "Mean latency (ns)": 355893971596,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 299868823,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_MultiMigServer",
    "Samples per second": 3396.11,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-PCIE-80GB (7x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3396.11,
    "characteristics.samples_per_second.normalized_per_core": 849.0275,
    "characteristics.samples_per_second.normalized_per_processor": 849.0275,
    "ck_system": "R750xa_A100-PCIE-80GB-MIG_28x1g.10gb_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3.5 TB",
    "host_storage_type": "SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750xa_A100-PCIE-80GB-MIG_28x1g.10gb_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2442000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-80GB-MIG_28x1g.10gb_TRT_Triton",
    "system_name": "Dell EMC PowerEdge R750xa (4x A100-PCIE-80GB-MIG-7x1g.10gb, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3700,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "7e81346c41b68262",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 527181857562,
    "90.00 percentile latency (ns)": 949930355468,
    "95.00 percentile latency (ns)": 1002767084838,
    "97.00 percentile latency (ns)": 1023893671674,
    "99.00 percentile latency (ns)": 1045023867918,
    "99.90 percentile latency (ns)": 1054550564553,
    "Max latency (ns)": 1055587081262,
    "Mean latency (ns)": 527298138986,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 108640232,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 625.244,
    "Scenario": "offline",
    "accelerator_cooling_type": "passive",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 2,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "2.12.3",
    "characteristics.mAP": 20.113,
    "characteristics.power": 566.6303503787884,
    "characteristics.power.normalized_per_core": 283.3151751893942,
    "characteristics.power.normalized_per_processor": 283.3151751893942,
    "characteristics.samples_per_second": 625.244,
    "characteristics.samples_per_second.normalized_per_core": 312.622,
    "characteristics.samples_per_second.normalized_per_processor": 312.622,
    "ck_system": "XE2420_A10x2_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "filesystem": "ext3/ext4",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_cooling_type": "air",
    "host_memory_capacity": "384 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 24,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC on",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "5.00.00.00",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "normalize_cores": 2,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE2420_A10x2_TRT_MaxQ",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.04.2",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_settings": "closed/Dell/power/XE2420_A10x2_power_settings.md",
    "print_timestamps": 0,
    "problem": false,
    "psu_details": "2x2000W",
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 660000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_cooling_type": "air",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_A10x2_TRT_MaxQ",
    "system_name": "Dell EMC PowerEdge XE2420 (2x A10, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1000,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 48,
    "uid": "cfab14a0584c618e",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 345904238956,
    "90.00 percentile latency (ns)": 623464175305,
    "95.00 percentile latency (ns)": 658131113245,
    "97.00 percentile latency (ns)": 672012397355,
    "99.00 percentile latency (ns)": 685879714432,
    "99.90 percentile latency (ns)": 692128895183,
    "Max latency (ns)": 692802579508,
    "Mean latency (ns)": 346007132894,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 194637795,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 9145.46,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-PCIE-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 10,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 9145.46,
    "characteristics.samples_per_second.normalized_per_core": 914.5459999999999,
    "characteristics.samples_per_second.normalized_per_processor": 914.5459999999999,
    "ck_system": "DSS8440_A100-PCIE-80GBx10_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": " 768GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 24,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6248R CPU @ 3.00GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "1.8 TB",
    "host_storage_type": "SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 10,
    "normalize_processors": 10,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/DSS8440_A100-PCIE-80GBx10_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 6336000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A100-PCIE-80GBx10_TRT",
    "system_name": "Dell EMC DSS 8440 (10x NVIDIA A100-PCIE-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 9600,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 48,
    "uid": "847e8b7a4455b8fd",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 349186748820,
    "90.00 percentile latency (ns)": 627207254277,
    "95.00 percentile latency (ns)": 661944637224,
    "97.00 percentile latency (ns)": 675843572982,
    "99.00 percentile latency (ns)": 689752799752,
    "99.90 percentile latency (ns)": 695994306614,
    "Max latency (ns)": 696685624959,
    "Mean latency (ns)": 348844233934,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 77057576,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 3637.8,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-PCIE-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3637.8,
    "characteristics.samples_per_second.normalized_per_core": 909.45,
    "characteristics.samples_per_second.normalized_per_processor": 909.45,
    "ck_system": "R750xa_A100-PCIE-80GBx4_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3.5 TB",
    "host_storage_type": "SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750xa_A100-PCIE-80GBx4_TRT_Triton",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2534400,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-80GBx4_TRT_Triton",
    "system_name": "Dell EMC PowerEdge R750xa (4x A100-PCIE-80GB, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3840,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "8812fc2ffd1700e8",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 325882557094,
    "90.00 percentile latency (ns)": 587032937965,
    "95.00 percentile latency (ns)": 619682065558,
    "97.00 percentile latency (ns)": 632730523414,
    "99.00 percentile latency (ns)": 645797964013,
    "99.90 percentile latency (ns)": 651663106549,
    "Max latency (ns)": 652270574894,
    "Mean latency (ns)": 325939863443,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 142033584,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 3804.56,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3804.56,
    "characteristics.samples_per_second.normalized_per_core": 475.57,
    "characteristics.samples_per_second.normalized_per_processor": 475.57,
    "ck_system": "DSS8440_A30x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 48,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6248R CPU @ 3.00GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3 TB",
    "host_storage_type": "NVMe",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/DSS8440_A30x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2481600,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A30x8_TRT",
    "system_name": "Dell EMC DSS 8440 (8x A30, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3760,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 96,
    "uid": "34a5b962445f0906",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 329520389873,
    "90.00 percentile latency (ns)": 594292672272,
    "95.00 percentile latency (ns)": 627371826336,
    "97.00 percentile latency (ns)": 640625155011,
    "99.00 percentile latency (ns)": 653858523535,
    "99.90 percentile latency (ns)": 659855683633,
    "Max latency (ns)": 660444223074,
    "Mean latency (ns)": 329701502344,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 145865464,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 1409.05,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 3,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 1409.05,
    "characteristics.samples_per_second.normalized_per_core": 469.68333333333334,
    "characteristics.samples_per_second.normalized_per_processor": 469.68333333333334,
    "ck_system": "R7525_A30x3_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7763",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 3,
    "normalize_processors": 3,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R7525_A30x3_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.4.2105",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 930600,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A30x3_TRT",
    "system_name": "Dell EMC PowerEdge R7525 (3x A30, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1410,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "dedb89a8416da3db",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 1039430066695,
    "90.00 percentile latency (ns)": 1039430066695,
    "95.00 percentile latency (ns)": 1039430066695,
    "97.00 percentile latency (ns)": 1039430066695,
    "99.00 percentile latency (ns)": 1039430066695,
    "99.90 percentile latency (ns)": 1039430066695,
    "Max latency (ns)": 1039430066695,
    "Mean latency (ns)": 1039430066695,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 1039430066695,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "SUT\u0000",
    "Samples per second": 57.1467,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "N/A",
    "accelerator_memory_configuration": "",
    "accelerator_model_name": "N/A",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 0,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 19.876,
    "characteristics.samples_per_second": 57.1467,
    "characteristics.samples_per_second.normalized_per_core": 1.0204767857142858,
    "characteristics.samples_per_second.normalized_per_processor": 28.57335,
    "ck_system": "R750_Intel6330_openvino",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "OpenVINO",
    "host_memory_capacity": "2.16TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "2.0GHz",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6330 CPU @ 2.0GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "1.5TB",
    "host_storage_type": "NVMe",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "normalize_cores": 56,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750_Intel6330_openvino",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.10 (GNU/Linux 5.8.0-45-generic x86_64)",
    "other_software_stack": "MKL-DNN; URL: github.com/intel/mkl-dnn",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "no",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 59400,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "ssd-resnet34_int8.xml",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750_Intel6330_openvino",
    "system_name": "Dell EMC PowerEdge R750 ICX-6330(2S 28C)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 90,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 56,
    "uid": "1ab08056ed79662b",
    "use_accelerator": false,
    "weight_data_types": "int8",
    "weight_transformations": "ONNX -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)"
  },
  {
    "50.00 percentile latency (ns)": 386191051542,
    "90.00 percentile latency (ns)": 697166329756,
    "95.00 percentile latency (ns)": 736077507825,
    "97.00 percentile latency (ns)": 751608898297,
    "99.00 percentile latency (ns)": 767133995454,
    "99.90 percentile latency (ns)": 774175974853,
    "Max latency (ns)": 774938820021,
    "Mean latency (ns)": 386487862592,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 157930082,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2452.84,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-PCIE-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 3,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 2452.84,
    "characteristics.samples_per_second.normalized_per_core": 817.6133333333333,
    "characteristics.samples_per_second.normalized_per_processor": 817.6133333333333,
    "ck_system": "R7525_vA100-PCIE-40GBx3_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7502",
    "host_processors_per_node": 2,
    "host_storage_capacity": "1.8 TB",
    "host_storage_type": "SSD",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 3,
    "normalize_processors": 3,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R7525_vA100-PCIE-40GBx3_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1900800,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "VMware Submission",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_vA100-PCIE-40GBx3_TRT",
    "system_name": "Dell EMC PowerEdge R7525 (3x GRID A100-40C, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 2880,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "c4a14ab3aa509a67",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 489361245311,
    "90.00 percentile latency (ns)": 881615718959,
    "95.00 percentile latency (ns)": 930593020164,
    "97.00 percentile latency (ns)": 950225916340,
    "99.00 percentile latency (ns)": 969915803670,
    "99.90 percentile latency (ns)": 978656158561,
    "Max latency (ns)": 979646908127,
    "Mean latency (ns)": 489510768094,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 199769280,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 2587.05,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIE-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.117,
    "characteristics.power": 1448.12806122449,
    "characteristics.power.normalized_per_core": 362.0320153061225,
    "characteristics.power.normalized_per_processor": 362.0320153061225,
    "characteristics.samples_per_second": 2587.05,
    "characteristics.samples_per_second.normalized_per_core": 646.7625,
    "characteristics.samples_per_second.normalized_per_processor": 646.7625,
    "ck_system": "R750xa_A100-PCIE-40GBx4_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "256 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 32,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8368 CPU @ 2.40GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "543 GB",
    "host_storage_type": "SSD",
    "hw_notes": "Result Measured for Power; GPU Power Limited to 175W",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750xa_A100-PCIE-40GBx4_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_settings": "closed/Dell/power/R750xa_A100-PCIE-40GBx4_TRT_MaxQ.md",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "1x2400W",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2534400,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-40GBx4_TRT_MaxQ",
    "system_name": "Dell EMC PowerEdge R750xa (4x A100-PCIE-40GB, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3840,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 64,
    "uid": "ecacfc1ec4c8f851",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 520900364026,
    "90.00 percentile latency (ns)": 937911895497,
    "95.00 percentile latency (ns)": 989984880218,
    "97.00 percentile latency (ns)": 1010836924378,
    "99.00 percentile latency (ns)": 1031688296469,
    "99.90 percentile latency (ns)": 1041062278412,
    "Max latency (ns)": 1042058561797,
    "Mean latency (ns)": 520723918033,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 104056813,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 633.362,
    "Scenario": "offline",
    "accelerator_cooling_type": "passive",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 2,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "2.12.3",
    "characteristics.mAP": 20.113,
    "characteristics.samples_per_second": 633.362,
    "characteristics.samples_per_second.normalized_per_core": 316.681,
    "characteristics.samples_per_second.normalized_per_processor": 316.681,
    "ck_system": "XE2420_A10x2_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "filesystem": "ext3/ext4",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_cooling_type": "air",
    "host_memory_capacity": "384 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 24,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC on",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "5.00.00.00",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "normalize_cores": 2,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE2420_A10x2_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.04.2",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_settings": "closed/Dell/power/XE2420_A10x2_power_settings.md",
    "print_timestamps": 0,
    "problem": false,
    "psu_details": "2x2000W",
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 660000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_cooling_type": "air",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_A10x2_TRT",
    "system_name": "Dell EMC PowerEdge XE2420 (2x A10, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1000,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 48,
    "uid": "5b00f964190a5ebc",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 531919396589,
    "90.00 percentile latency (ns)": 957490345167,
    "95.00 percentile latency (ns)": 1010647593861,
    "97.00 percentile latency (ns)": 1031984407567,
    "99.00 percentile latency (ns)": 1053222992149,
    "99.90 percentile latency (ns)": 1062836665100,
    "Max latency (ns)": 1063859279902,
    "Mean latency (ns)": 531868708526,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 99469622,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 620.383,
    "Scenario": "offline",
    "accelerator_cooling_type": "",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 2,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.mAP": 20.113,
    "characteristics.samples_per_second": 620.383,
    "characteristics.samples_per_second.normalized_per_core": 310.1915,
    "characteristics.samples_per_second.normalized_per_processor": 310.1915,
    "ck_system": "XR12_datacenter_A10x2_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "filesystem": "",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_cooling_type": "",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6330 CPU @ 2.00GHz",
    "host_processors_per_node": 1,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC on",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "normalize_cores": 2,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XR12_datacenter_A10x2_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.04.2",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_settings": "",
    "print_timestamps": 0,
    "problem": false,
    "psu_details": "",
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 660000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_cooling_type": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XR12_datacenter_A10x2_TRT",
    "system_name": "Dell EMC PowerEdge XR12 (2x A10, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1000,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 28,
    "uid": "aa4d298d4e3da2ec",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 330698681840,
    "90.00 percentile latency (ns)": 595136057483,
    "95.00 percentile latency (ns)": 628164969082,
    "97.00 percentile latency (ns)": 641386353141,
    "99.00 percentile latency (ns)": 654607530676,
    "99.90 percentile latency (ns)": 660555208951,
    "Max latency (ns)": 662290943626,
    "Mean latency (ns)": 330698367391,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 264124007,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_MultiMigServer",
    "Samples per second": 3766.92,
    "Scenario": "offline",
    "accelerator_cooling_type": "",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (7x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 3766.92,
    "characteristics.samples_per_second.normalized_per_core": 941.73,
    "characteristics.samples_per_second.normalized_per_processor": 941.73,
    "ck_system": "XE8545_A100-SXM-80GB-MIG_28x1g.10gb_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_cooling_type": "",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7763",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "500W A100-SXM-80GB",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE8545_A100-SXM-80GB-MIG_28x1g.10gb_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.04.2",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2494800,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_A100-SXM-80GB-MIG_28x1g.10gb_TRT_Triton",
    "system_name": "Dell EMC PowerEdge XE8545 (4x A100-SXM-80GB-7x1g.10gb, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 3780,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "fc05146f3b1d177d",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 329749208289,
    "90.00 percentile latency (ns)": 593441374724,
    "95.00 percentile latency (ns)": 626390616074,
    "97.00 percentile latency (ns)": 639576635741,
    "99.00 percentile latency (ns)": 652763050744,
    "99.90 percentile latency (ns)": 658700244178,
    "Max latency (ns)": 659355969333,
    "Mean latency (ns)": 329739087859,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 91490111,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "Triton_Server",
    "Samples per second": 4096,
    "Scenario": "offline",
    "accelerator_cooling_type": "",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 20.117,
    "characteristics.samples_per_second": 4096,
    "characteristics.samples_per_second.normalized_per_core": 1024.0,
    "characteristics.samples_per_second.normalized_per_processor": 1024.0,
    "ck_system": "XE8545_A100-SXM-80GBx4_TRT_Triton",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_cooling_type": "",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7763",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "500W A100-SXM-80GB",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE8545_A100-SXM-80GBx4_TRT_Triton",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.04.2",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2700720,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_A100-SXM-80GBx4_TRT_Triton",
    "system_name": "Dell EMC PowerEdge XE8545 (4x A100-SXM-80GB, TensorRT, Triton)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 4092,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "44af893c64c2399f",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 332158952429,
    "90.00 percentile latency (ns)": 597853835177,
    "95.00 percentile latency (ns)": 631039044887,
    "97.00 percentile latency (ns)": 644330988579,
    "99.00 percentile latency (ns)": 657591552585,
    "99.90 percentile latency (ns)": 663611279645,
    "Max latency (ns)": 664226764580,
    "Mean latency (ns)": 332162736070,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 141080814,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "LWIS_Server",
    "Samples per second": 4123.59,
    "Scenario": "offline",
    "accelerator_cooling_type": "",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "2.2.5",
    "characteristics.mAP": 20.117,
    "characteristics.power": 3031.5400602409622,
    "characteristics.power.normalized_per_core": 757.8850150602406,
    "characteristics.power.normalized_per_processor": 757.8850150602406,
    "characteristics.samples_per_second": 4123.59,
    "characteristics.samples_per_second.normalized_per_core": 1030.8975,
    "characteristics.samples_per_second.normalized_per_processor": 1030.8975,
    "ck_system": "XE8545_A100-SXM-80GBx4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "filesystem": "ext3/ext4",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_cooling_type": "",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7763",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "500W A100-SXM-80GB",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "management_firmware_version": "4.40.40.151",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE8545_A100-SXM-80GBx4_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.04.2",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "power_management": "",
    "power_settings": "closed/Dell/power/XE8545_A100-SXM-80GBx4_power_settings.md",
    "print_timestamps": 0,
    "problem": false,
    "psu_details": "4x2400W",
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 2739000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "resnet34-ssd1200.pytorch",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_cooling_type": "air",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_A100-SXM-80GBx4_TRT",
    "system_name": "Dell EMC PowerEdge XE8545 (4x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 4150,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 128,
    "uid": "713a2204b9cbac2e",
    "use_accelerator": true,
    "weight_data_types": "int8",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 1025697321584,
    "90.00 percentile latency (ns)": 1025697321584,
    "95.00 percentile latency (ns)": 1025697321584,
    "97.00 percentile latency (ns)": 1025697321584,
    "99.00 percentile latency (ns)": 1025697321584,
    "99.90 percentile latency (ns)": 1025697321584,
    "Max latency (ns)": 1025697321584,
    "Mean latency (ns)": 1025697321584,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 1025697321584,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "SUT\u0000",
    "Samples per second": 57.9118,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "N/A",
    "accelerator_memory_configuration": "",
    "accelerator_model_name": "N/A",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 0,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.mAP": 19.876,
    "characteristics.samples_per_second": 57.9118,
    "characteristics.samples_per_second.normalized_per_core": 0.904871875,
    "characteristics.samples_per_second.normalized_per_processor": 28.9559,
    "ck_system": "R750_Intel8352M_2S_32C",
    "ck_used": false,
    "cooling": "",
    "dataset": "COCO 2017 (300x300)",
    "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/coco2017.md",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.mAP",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "ssd-mobilenet",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package",
    "framework": "OpenVINO",
    "host_memory_capacity": "512GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 32,
    "host_processor_frequency": "2.30GHz",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8352M CPU @ 2.30GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "2.9TB",
    "host_storage_type": "NVMe",
    "hw_notes": "",
    "informal_model": "ssd-resnet34",
    "input_data_types": "int8",
    "key.accuracy": "characteristics.mAP",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "normalize_cores": 64,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750_Intel8352M_2S_32C",
    "number_of_nodes": 1,
    "operating_system": " Ubuntu 20.04.2 LTS (GNU/Linux 5.4.0-80-generic x86_64)",
    "other_software_stack": "MKL-DNN; URL: github.com/intel/mkl-dnn",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 64,
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "no",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 59400,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "ssd-resnet34_int8.xml",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750_Intel8352M_2S_32C",
    "system_name": "Dell EMC PowerEdge R750 ICX-8352M(2S 32C)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 90,
    "task": "object detection",
    "task2": "object detection",
    "total_cores": 64,
    "uid": "e07e54a841b7beaa",
    "use_accelerator": false,
    "weight_data_types": "int8",
    "weight_transformations": "ONNX -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)"
  }
]