[ { "50.00 percentile latency (ns)": 7111073, "90.00 percentile latency (ns)": 8949368, "95.00 percentile latency (ns)": 9483819, "97.00 percentile latency (ns)": 9865746, "99.00 percentile latency (ns)": 12638654, "99.90 percentile latency (ns)": 24676270, "Completed samples per second": 83512.66, "Max latency (ns)": 177127146, "Mean latency (ns)": 7315051, "Min duration satisfied": "Yes", "Min latency (ns)": 2967994, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 83513.48, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 83513.48, "characteristics.scheduled_queries_per_second.normalized_per_core": 10439.185, "characteristics.scheduled_queries_per_second.normalized_per_processor": 10439.185, "characteristics.total": 50000, "ck_system": "A10x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT_Triton", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 83500, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "cda17bd62b914608", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 4424784, "90.00 percentile latency (ns)": 7126686, "95.00 percentile latency (ns)": 8232718, "97.00 percentile latency (ns)": 9213167, "99.00 percentile latency (ns)": 12627832, "99.90 percentile latency (ns)": 32313617, "Completed samples per second": 70006.15, "Max latency (ns)": 223174317, "Mean latency (ns)": 4683863, "Min duration satisfied": "Yes", "Min latency (ns)": 926499, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_MultiMigServer", "Scenario": "server", "Scheduled samples per second": 70006.83, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (7x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.038, "characteristics.good": 38019, "characteristics.scheduled_queries_per_second": 70006.83, "characteristics.scheduled_queries_per_second.normalized_per_core": 8750.85375, "characteristics.scheduled_queries_per_second.normalized_per_processor": 8750.85375, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton", "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB-MIG-7x1g.10gb, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 70000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "232b065e338d48d2", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5245659, "90.00 percentile latency (ns)": 6161851, "95.00 percentile latency (ns)": 6417561, "97.00 percentile latency (ns)": 6581446, "99.00 percentile latency (ns)": 6832392, "99.90 percentile latency (ns)": 429187059, "Completed samples per second": 106834.26, "Max latency (ns)": 2028764166, "Mean latency (ns)": 6584386, "Min duration satisfied": "Yes", "Min latency (ns)": 2789477, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Concurrent_Triton_Server", "Scenario": "server", "Scheduled samples per second": 107012.87, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.048, "characteristics.good": 38024, "characteristics.scheduled_queries_per_second": 107012.87, "characteristics.scheduled_queries_per_second.normalized_per_core": 26753.2175, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26753.2175, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_Triton", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 107000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "22483e92b135e888", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5265154, "90.00 percentile latency (ns)": 8975464, "95.00 percentile latency (ns)": 10532946, "97.00 percentile latency (ns)": 10908354, "99.00 percentile latency (ns)": 14343894, "99.90 percentile latency (ns)": 31599804, "Completed samples per second": 65003.43, "Max latency (ns)": 228597621, "Mean latency (ns)": 6055453, "Min duration satisfied": "Yes", "Min latency (ns)": 2175831, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_MultiMigServer", "Scenario": "server", "Scheduled samples per second": 65003.79, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30 (4x1g.6gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.038, "characteristics.good": 38019, "characteristics.scheduled_queries_per_second": 65003.79, "characteristics.scheduled_queries_per_second.normalized_per_core": 8125.47375, "characteristics.scheduled_queries_per_second.normalized_per_processor": 8125.47375, "characteristics.total": 50000, "ck_system": "A30-MIG_32x1g.6gb_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_32x1g.6gb_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_32x1g.6gb_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A30-MIG-4x1g.6gb, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 65000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "82467da2ecadeb1e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5776197, "90.00 percentile latency (ns)": 6278025, "95.00 percentile latency (ns)": 6363924, "97.00 percentile latency (ns)": 6413351, "99.00 percentile latency (ns)": 6499887, "99.90 percentile latency (ns)": 6641126, "Completed samples per second": 104011.04, "Max latency (ns)": 14399602, "Mean latency (ns)": 5781210, "Min duration satisfied": "Yes", "Min latency (ns)": 4265275, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 104011.92, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.034, "characteristics.good": 38017, "characteristics.scheduled_queries_per_second": 104011.92, "characteristics.scheduled_queries_per_second.normalized_per_core": 26002.98, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26002.98, "characteristics.total": 50000, "ck_system": "A100-PCIe-80GB_aarch64x4_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 80, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Ampere Altra Q80-30", "host_processors_per_node": 1, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GB_aarch64x4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GB_aarch64x4_TRT", "system_name": "Gigabyte G242-P31 (4x A100-PCIe-80GB_aarch64, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 104000, "task": "image classification", "task2": "image classification", "total_cores": 80, "uid": "2e6424c92a52bf05", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 4589017, "90.00 percentile latency (ns)": 4849745, "95.00 percentile latency (ns)": 4921475, "97.00 percentile latency (ns)": 4981383, "99.00 percentile latency (ns)": 9016731, "99.90 percentile latency (ns)": 25086708, "Completed samples per second": 116013.61, "Max latency (ns)": 52646211, "Mean latency (ns)": 4721884, "Min duration satisfied": "Yes", "Min latency (ns)": 3150703, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 116014.38, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.044, "characteristics.good": 38022, "characteristics.scheduled_queries_per_second": 116014.38, "characteristics.scheduled_queries_per_second.normalized_per_core": 14501.7975, "characteristics.scheduled_queries_per_second.normalized_per_processor": 14501.7975, "characteristics.total": 50000, "ck_system": "A30x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT", "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 116000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "5dad70444779c932", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5761462, "90.00 percentile latency (ns)": 7329154, "95.00 percentile latency (ns)": 7861069, "97.00 percentile latency (ns)": 8494558, "99.00 percentile latency (ns)": 13090655, "99.90 percentile latency (ns)": 30222503, "Completed samples per second": 110012.01, "Max latency (ns)": 314562319, "Mean latency (ns)": 6213938, "Min duration satisfied": "Yes", "Min latency (ns)": 3282376, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 110013.13, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.044, "characteristics.good": 38022, "characteristics.scheduled_queries_per_second": 110013.13, "characteristics.scheduled_queries_per_second.normalized_per_core": 13751.64125, "characteristics.scheduled_queries_per_second.normalized_per_processor": 13751.64125, "characteristics.total": 50000, "ck_system": "A30x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 110000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "44fd88046546fd3b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5272070, "90.00 percentile latency (ns)": 5564570, "95.00 percentile latency (ns)": 5641916, "97.00 percentile latency (ns)": 5720945, "99.00 percentile latency (ns)": 14657031, "99.90 percentile latency (ns)": 33729871, "Completed samples per second": 200036.75, "Max latency (ns)": 59738634, "Mean latency (ns)": 5493836, "Min duration satisfied": "Yes", "Min latency (ns)": 2823836, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 200038.16, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.scheduled_queries_per_second": 200038.16, "characteristics.scheduled_queries_per_second.normalized_per_core": 25004.77, "characteristics.scheduled_queries_per_second.normalized_per_processor": 25004.77, "characteristics.total": 50000, "ck_system": "A100-PCIe-80GBx8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx8_TRT", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 200000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "26f0a34419952f57", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3182308, "90.00 percentile latency (ns)": 3435668, "95.00 percentile latency (ns)": 3527600, "97.00 percentile latency (ns)": 3688191, "99.00 percentile latency (ns)": 14810396, "99.90 percentile latency (ns)": 32460956, "Completed samples per second": 185033.16, "Max latency (ns)": 54749222, "Mean latency (ns)": 3479187, "Min duration satisfied": "Yes", "Min latency (ns)": 2671746, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 185034.18, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.046, "characteristics.good": 38023, "characteristics.power": 2108.4293333333317, "characteristics.power.normalized_per_core": 263.55366666666646, "characteristics.power.normalized_per_processor": 263.55366666666646, "characteristics.scheduled_queries_per_second": 185034.18, "characteristics.scheduled_queries_per_second.normalized_per_core": 23129.2725, "characteristics.scheduled_queries_per_second.normalized_per_processor": 23129.2725, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_MaxQ", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 185000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "4bb38ec871769b13", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 4873883, "90.00 percentile latency (ns)": 5137807, "95.00 percentile latency (ns)": 5209991, "97.00 percentile latency (ns)": 5266287, "99.00 percentile latency (ns)": 5865791, "99.90 percentile latency (ns)": 17257217, "Completed samples per second": 260039.25, "Max latency (ns)": 39131179, "Mean latency (ns)": 4922247, "Min duration satisfied": "Yes", "Min latency (ns)": 4101156, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 260041.59, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.044, "characteristics.good": 38022, "characteristics.scheduled_queries_per_second": 260041.59, "characteristics.scheduled_queries_per_second.normalized_per_core": 32505.19875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 32505.19875, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT", "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 260000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "abb4e4d16a8ba073", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3045510, "90.00 percentile latency (ns)": 3310663, "95.00 percentile latency (ns)": 3376691, "97.00 percentile latency (ns)": 3431435, "99.00 percentile latency (ns)": 3901437, "99.90 percentile latency (ns)": 17453807, "Completed samples per second": 107012.16, "Max latency (ns)": 47665019, "Mean latency (ns)": 3106407, "Min duration satisfied": "Yes", "Min latency (ns)": 2517185, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 107012.87, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.036, "characteristics.good": 38018, "characteristics.scheduled_queries_per_second": 107012.87, "characteristics.scheduled_queries_per_second.normalized_per_core": 26753.2175, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26753.2175, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 107000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "0624071808f590f1", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5165190, "90.00 percentile latency (ns)": 7981998, "95.00 percentile latency (ns)": 8982805, "97.00 percentile latency (ns)": 9670704, "99.00 percentile latency (ns)": 11031504, "99.90 percentile latency (ns)": 13437371, "Completed samples per second": 3102.73, "Max latency (ns)": 18339224, "Mean latency (ns)": 5426688, "Min duration satisfied": "Yes", "Min latency (ns)": 929518, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 3102.76, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30 (1x1g.6gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.046, "characteristics.good": 38023, "characteristics.scheduled_queries_per_second": 3102.76, "characteristics.scheduled_queries_per_second.normalized_per_core": 3102.76, "characteristics.scheduled_queries_per_second.normalized_per_processor": 3102.76, "characteristics.total": 50000, "ck_system": "A30-MIG_1x1g.6gb_TRT_HeteroMultiUse", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.6gb_TRT_HeteroMultiUse", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.6gb_TRT_HeteroMultiUse", "system_name": "Gigabyte G482-Z54 (1x A30-MIG-1x1g.6gb, TensorRT, HeteroMultiUse)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 3100, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "4b96bf4cfc82f696", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3665758, "90.00 percentile latency (ns)": 5359471, "95.00 percentile latency (ns)": 6128805, "97.00 percentile latency (ns)": 6705984, "99.00 percentile latency (ns)": 7919665, "99.90 percentile latency (ns)": 10137577, "Completed samples per second": 3602.1, "Max latency (ns)": 14713366, "Mean latency (ns)": 3817459, "Min duration satisfied": "Yes", "Min latency (ns)": 858340, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 3602.11, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.scheduled_queries_per_second": 3602.11, "characteristics.scheduled_queries_per_second.normalized_per_core": 3602.11, "characteristics.scheduled_queries_per_second.normalized_per_processor": 3602.11, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, HeteroMultiUse)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 3600, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "8da826426a48d68f", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3222451, "90.00 percentile latency (ns)": 3517186, "95.00 percentile latency (ns)": 3657649, "97.00 percentile latency (ns)": 3887058, "99.00 percentile latency (ns)": 7349420, "99.90 percentile latency (ns)": 22791457, "Completed samples per second": 107012.17, "Max latency (ns)": 54231200, "Mean latency (ns)": 3345634, "Min duration satisfied": "Yes", "Min latency (ns)": 2689325, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 107012.87, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.044, "characteristics.good": 38022, "characteristics.power": 1129.5533333333342, "characteristics.power.normalized_per_core": 282.38833333333355, "characteristics.power.normalized_per_processor": 282.38833333333355, "characteristics.scheduled_queries_per_second": 107012.87, "characteristics.scheduled_queries_per_second.normalized_per_core": 26753.2175, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26753.2175, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 107000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "ac5457f891982c61", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5405320, "90.00 percentile latency (ns)": 5688601, "95.00 percentile latency (ns)": 5745107, "97.00 percentile latency (ns)": 5782099, "99.00 percentile latency (ns)": 5887384, "99.90 percentile latency (ns)": 26833084, "Completed samples per second": 190035.8, "Max latency (ns)": 58586901, "Mean latency (ns)": 5479423, "Min duration satisfied": "Yes", "Min latency (ns)": 3220987, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 190037.74, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.044, "characteristics.good": 38022, "characteristics.scheduled_queries_per_second": 190037.74, "characteristics.scheduled_queries_per_second.normalized_per_core": 23754.7175, "characteristics.scheduled_queries_per_second.normalized_per_processor": 23754.7175, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 190000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "ca22dc41b4a82d73", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3368298, "90.00 percentile latency (ns)": 4080371, "95.00 percentile latency (ns)": 4440482, "97.00 percentile latency (ns)": 4853409, "99.00 percentile latency (ns)": 7978689, "99.90 percentile latency (ns)": 118983197, "Completed samples per second": 175030.37, "Max latency (ns)": 244041089, "Mean latency (ns)": 3893902, "Min duration satisfied": "Yes", "Min latency (ns)": 2847481, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 175031.41, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.046, "characteristics.good": 38023, "characteristics.scheduled_queries_per_second": 175031.41, "characteristics.scheduled_queries_per_second.normalized_per_core": 21878.92625, "characteristics.scheduled_queries_per_second.normalized_per_processor": 21878.92625, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 175000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "44435f94d95c6dfe", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 8524074, "90.00 percentile latency (ns)": 9293137, "95.00 percentile latency (ns)": 9481054, "97.00 percentile latency (ns)": 9632408, "99.00 percentile latency (ns)": 10126064, "99.90 percentile latency (ns)": 40832095, "Completed samples per second": 220037.72, "Max latency (ns)": 566736973, "Mean latency (ns)": 8542551, "Min duration satisfied": "Yes", "Min latency (ns)": 5185406, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Concurrent_Triton_Server", "Scenario": "server", "Scheduled samples per second": 220040.13, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.044, "characteristics.good": 38022, "characteristics.scheduled_queries_per_second": 220040.13, "characteristics.scheduled_queries_per_second.normalized_per_core": 27505.01625, "characteristics.scheduled_queries_per_second.normalized_per_processor": 27505.01625, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_Triton", "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 220000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "9c175dc646fdbedf", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 6041020, "90.00 percentile latency (ns)": 8219037, "95.00 percentile latency (ns)": 9416253, "97.00 percentile latency (ns)": 10884294, "99.00 percentile latency (ns)": 14532145, "99.90 percentile latency (ns)": 22050418, "Completed samples per second": 232033.16, "Max latency (ns)": 48126933, "Mean latency (ns)": 6580149, "Min duration satisfied": "Yes", "Min latency (ns)": 4430992, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 232035.55, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.046, "characteristics.good": 38023, "characteristics.power": 2932.081166666666, "characteristics.power.normalized_per_core": 366.51014583333324, "characteristics.power.normalized_per_processor": 366.51014583333324, "characteristics.scheduled_queries_per_second": 232035.55, "characteristics.scheduled_queries_per_second.normalized_per_core": 29004.44375, "characteristics.scheduled_queries_per_second.normalized_per_processor": 29004.44375, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 232000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "14addf742a18857a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3487898, "90.00 percentile latency (ns)": 4284836, "95.00 percentile latency (ns)": 4945076, "97.00 percentile latency (ns)": 5701159, "99.00 percentile latency (ns)": 8541627, "99.90 percentile latency (ns)": 19785964, "Completed samples per second": 183034.38, "Max latency (ns)": 37136251, "Mean latency (ns)": 3699343, "Min duration satisfied": "Yes", "Min latency (ns)": 2765626, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 183035.92, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.scheduled_queries_per_second": 183035.92, "characteristics.scheduled_queries_per_second.normalized_per_core": 22879.49, "characteristics.scheduled_queries_per_second.normalized_per_processor": 22879.49, "characteristics.total": 50000, "ck_system": "A100-PCIe-80GBx8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx8_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 183000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "ce5a82e7aeeedc3e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 12200858, "90.00 percentile latency (ns)": 12841833, "95.00 percentile latency (ns)": 12983059, "97.00 percentile latency (ns)": 13070844, "99.00 percentile latency (ns)": 13240372, "99.90 percentile latency (ns)": 22248118, "Completed samples per second": 88011.84, "Max latency (ns)": 61290181, "Mean latency (ns)": 12228425, "Min duration satisfied": "Yes", "Min latency (ns)": 4264477, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 88013.5, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 88013.5, "characteristics.scheduled_queries_per_second.normalized_per_core": 11001.6875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 11001.6875, "characteristics.total": 50000, "ck_system": "A10x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 88000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "d3c180b872b49a88", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5387140, "90.00 percentile latency (ns)": 9973403, "95.00 percentile latency (ns)": 10948739, "97.00 percentile latency (ns)": 11651856, "99.00 percentile latency (ns)": 13550106, "99.90 percentile latency (ns)": 20450564, "Completed samples per second": 4901.27, "Max latency (ns)": 202955290, "Mean latency (ns)": 6415398, "Min duration satisfied": "Yes", "Min latency (ns)": 3932392, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 4901.3, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.326, "characteristics.good": 38163, "characteristics.scheduled_queries_per_second": 4901.3, "characteristics.scheduled_queries_per_second.normalized_per_core": 43.761607142857144, "characteristics.scheduled_queries_per_second.normalized_per_processor": 1225.325, "characteristics.total": 50000, "ck_system": "Triton_CPU_4S_8380Hx1", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVino 2021", "host_memory_capacity": "1536 GB", "host_memory_configuration": "12 slots / 32GB each / 3200 MT/s per socket", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 8380H CPU @ 2.70GHz", "host_processors_per_node": 4, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "fp32", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 112, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/Triton_CPU_4S_8380Hx1", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "Tensorflow 2.4.0, OpenVino 2021, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "The original weight filename: https://zenodo.org/record/2535873/files/resnet50_v1.pb", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "CPU Inference on Triton Inference Server", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Triton_CPU_4S_8380Hx1", "system_name": "Supermicro SYS-240P-TNRT (Cooper Lake running Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 4899.5, "task": "image classification", "task2": "image classification", "total_cores": 112, "uid": "3e485b8dabd9447a", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "We transform the original fp32 weight to int8 weight using symmetric quantization." }, { "50.00 percentile latency (ns)": 7821413, "90.00 percentile latency (ns)": 9788320, "95.00 percentile latency (ns)": 10308168, "97.00 percentile latency (ns)": 10691868, "99.00 percentile latency (ns)": 13056998, "99.90 percentile latency (ns)": 22280331, "Completed samples per second": 83482.24, "Max latency (ns)": 40701178, "Mean latency (ns)": 7943116, "Min duration satisfied": "Yes", "Min latency (ns)": 2927232, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 83483.02, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 83483.02, "characteristics.scheduled_queries_per_second.normalized_per_core": 10435.3775, "characteristics.scheduled_queries_per_second.normalized_per_processor": 10435.3775, "characteristics.total": 50000, "ck_system": "A10x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT_Triton", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 83500, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "4f678e5b30e5d943", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5723864, "90.00 percentile latency (ns)": 7131460, "95.00 percentile latency (ns)": 7550754, "97.00 percentile latency (ns)": 7855513, "99.00 percentile latency (ns)": 8519052, "99.90 percentile latency (ns)": 10352869, "Completed samples per second": 30794.16, "Max latency (ns)": 23813165, "Mean latency (ns)": 5731434, "Min duration satisfied": "Yes", "Min latency (ns)": 1863949, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 30794.37, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 30794.37, "characteristics.scheduled_queries_per_second.normalized_per_core": 30794.37, "characteristics.scheduled_queries_per_second.normalized_per_processor": 30794.37, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_datacenter", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_datacenter", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_datacenter", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 30800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "dbdf6a9cf0401597", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 7350637, "90.00 percentile latency (ns)": 10276812, "95.00 percentile latency (ns)": 11488204, "97.00 percentile latency (ns)": 12419235, "99.00 percentile latency (ns)": 14648564, "99.90 percentile latency (ns)": 20810121, "Completed samples per second": 1989.22, "Max latency (ns)": 43127998, "Mean latency (ns)": 7971771, "Min duration satisfied": "Yes", "Min latency (ns)": 5500026, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 1989.24, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.304, "characteristics.good": 38152, "characteristics.scheduled_queries_per_second": 1989.24, "characteristics.scheduled_queries_per_second.normalized_per_core": 8.2885, "characteristics.scheduled_queries_per_second.normalized_per_processor": 994.62, "characteristics.total": 50000, "ck_system": "Triton_CPU_2S_6258Rx1", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVino 2021.2.200", "host_memory_capacity": "768 GB", "host_memory_configuration": "6 slots / 32GB each / 2934 MT/s per socket", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "fp32", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 240, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Triton_CPU_2S_6258Rx1", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "OpenVino 2021.2.200, Triton 21.02; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "No", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "The original weight filename: https://zenodo.org/record/2535873/files/resnet50_v1.pb", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "CPU Inference on Triton Inference ServerPowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Triton_CPU_2S_6258Rx1", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 1989.5, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "5b22eefb264a558a", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "We transform the original fp32 weight to int8 weight using symmetric quantization." }, { "50.00 percentile latency (ns)": 4211400, "90.00 percentile latency (ns)": 5290702, "95.00 percentile latency (ns)": 5574284, "97.00 percentile latency (ns)": 5818908, "99.00 percentile latency (ns)": 6915454, "99.90 percentile latency (ns)": 18433711, "Completed samples per second": 23951.67, "Max latency (ns)": 1002803020, "Mean latency (ns)": 4269896, "Min duration satisfied": "Yes", "Min latency (ns)": 2557285, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 23991.64, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 23991.64, "characteristics.scheduled_queries_per_second.normalized_per_core": 23991.64, "characteristics.scheduled_queries_per_second.normalized_per_processor": 23991.64, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_Triton_datacenter", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_Triton_datacenter", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_Triton_datacenter", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 24000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "f92777ed0a81e1cd", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 4825272, "90.00 percentile latency (ns)": 5077913, "95.00 percentile latency (ns)": 5141376, "97.00 percentile latency (ns)": 5185560, "99.00 percentile latency (ns)": 5292347, "99.90 percentile latency (ns)": 16113266, "Completed samples per second": 115986.28, "Max latency (ns)": 56530714, "Mean latency (ns)": 4864616, "Min duration satisfied": "Yes", "Min latency (ns)": 3133969, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 115987.32, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 115987.32, "characteristics.scheduled_queries_per_second.normalized_per_core": 14498.415, "characteristics.scheduled_queries_per_second.normalized_per_processor": 14498.415, "characteristics.total": 50000, "ck_system": "A30x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT", "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 116000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "dcaa77bb8ac2470a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 6348052, "90.00 percentile latency (ns)": 7746708, "95.00 percentile latency (ns)": 8040099, "97.00 percentile latency (ns)": 8462883, "99.00 percentile latency (ns)": 10055500, "99.90 percentile latency (ns)": 27765198, "Completed samples per second": 109984.68, "Max latency (ns)": 919850560, "Mean latency (ns)": 6743169, "Min duration satisfied": "Yes", "Min latency (ns)": 3048679, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 109985.66, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 109985.66, "characteristics.scheduled_queries_per_second.normalized_per_core": 13748.2075, "characteristics.scheduled_queries_per_second.normalized_per_processor": 13748.2075, "characteristics.total": 50000, "ck_system": "A30x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 110000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "4bdd4506b51b70cc", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3365195, "90.00 percentile latency (ns)": 3595847, "95.00 percentile latency (ns)": 3650589, "97.00 percentile latency (ns)": 3685561, "99.00 percentile latency (ns)": 3866535, "99.90 percentile latency (ns)": 20251342, "Completed samples per second": 184982.39, "Max latency (ns)": 49874221, "Mean latency (ns)": 3442722, "Min duration satisfied": "Yes", "Min latency (ns)": 2860467, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 184983.73, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.power": 2245.331000000001, "characteristics.power.normalized_per_core": 280.66637500000013, "characteristics.power.normalized_per_processor": 280.66637500000013, "characteristics.scheduled_queries_per_second": 184983.73, "characteristics.scheduled_queries_per_second.normalized_per_core": 23122.96625, "characteristics.scheduled_queries_per_second.normalized_per_processor": 23122.96625, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_MaxQ", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 185000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "9184e05ded485176", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5662792, "90.00 percentile latency (ns)": 5943649, "95.00 percentile latency (ns)": 6032348, "97.00 percentile latency (ns)": 6095849, "99.00 percentile latency (ns)": 6275949, "99.90 percentile latency (ns)": 12874867, "Completed samples per second": 259990.34, "Max latency (ns)": 33273326, "Mean latency (ns)": 5690926, "Min duration satisfied": "Yes", "Min latency (ns)": 4380699, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 259993.64, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 259993.64, "characteristics.scheduled_queries_per_second.normalized_per_core": 32499.205, "characteristics.scheduled_queries_per_second.normalized_per_processor": 32499.205, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT", "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 260000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "fb342a8b2fffe9da", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3198567, "90.00 percentile latency (ns)": 3479013, "95.00 percentile latency (ns)": 3569107, "97.00 percentile latency (ns)": 3680703, "99.00 percentile latency (ns)": 3993219, "99.90 percentile latency (ns)": 16651716, "Completed samples per second": 106987.92, "Max latency (ns)": 57195356, "Mean latency (ns)": 3251087, "Min duration satisfied": "Yes", "Min latency (ns)": 2689454, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 106988.47, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 106988.47, "characteristics.scheduled_queries_per_second.normalized_per_core": 26747.1175, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26747.1175, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 107000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "48bf1ccf9eb3cffe", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 4284625, "90.00 percentile latency (ns)": 7510253, "95.00 percentile latency (ns)": 8933244, "97.00 percentile latency (ns)": 9962923, "99.00 percentile latency (ns)": 12318341, "99.90 percentile latency (ns)": 17285711, "Completed samples per second": 3601.76, "Max latency (ns)": 27573336, "Mean latency (ns)": 4794430, "Min duration satisfied": "Yes", "Min latency (ns)": 905465, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 3601.79, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 3601.79, "characteristics.scheduled_queries_per_second.normalized_per_core": 3601.79, "characteristics.scheduled_queries_per_second.normalized_per_processor": 3601.79, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "system_name": "NVIDIA DGX-A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, HeteroMultiUse)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 3600, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "a238fb4f8b9e5c28", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3414895, "90.00 percentile latency (ns)": 3988083, "95.00 percentile latency (ns)": 4214534, "97.00 percentile latency (ns)": 4343775, "99.00 percentile latency (ns)": 4589041, "99.90 percentile latency (ns)": 31233150, "Completed samples per second": 106987.75, "Max latency (ns)": 62014870, "Mean latency (ns)": 3531955, "Min duration satisfied": "Yes", "Min latency (ns)": 2856319, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 106988.47, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.power": 1266.027833333334, "characteristics.power.normalized_per_core": 316.5069583333335, "characteristics.power.normalized_per_processor": 316.5069583333335, "characteristics.scheduled_queries_per_second": 106988.47, "characteristics.scheduled_queries_per_second.normalized_per_core": 26747.1175, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26747.1175, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 107000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "ea85064fc2adf7cc", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3215901, "90.00 percentile latency (ns)": 3479224, "95.00 percentile latency (ns)": 3547305, "97.00 percentile latency (ns)": 3602432, "99.00 percentile latency (ns)": 5727643, "99.90 percentile latency (ns)": 35077668, "Completed samples per second": 207974.24, "Max latency (ns)": 65420194, "Mean latency (ns)": 3368811, "Min duration satisfied": "Yes", "Min latency (ns)": 2754209, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 207975.79, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 207975.79, "characteristics.scheduled_queries_per_second.normalized_per_core": 25996.97375, "characteristics.scheduled_queries_per_second.normalized_per_processor": 25996.97375, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 208000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "f98eea66646993e0", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 6465771, "90.00 percentile latency (ns)": 8086079, "95.00 percentile latency (ns)": 8715109, "97.00 percentile latency (ns)": 9480106, "99.00 percentile latency (ns)": 12386002, "99.90 percentile latency (ns)": 54864352, "Completed samples per second": 179984.41, "Max latency (ns)": 184585021, "Mean latency (ns)": 6890233, "Min duration satisfied": "Yes", "Min latency (ns)": 2087744, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 179985.89, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 179985.89, "characteristics.scheduled_queries_per_second.normalized_per_core": 22498.23625, "characteristics.scheduled_queries_per_second.normalized_per_processor": 22498.23625, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_Triton", "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 180000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "2c5a9af59b48964a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5914508, "90.00 percentile latency (ns)": 6334715, "95.00 percentile latency (ns)": 6458572, "97.00 percentile latency (ns)": 6549170, "99.00 percentile latency (ns)": 6802658, "99.90 percentile latency (ns)": 9653974, "Completed samples per second": 239988.82, "Max latency (ns)": 32143315, "Mean latency (ns)": 5934402, "Min duration satisfied": "Yes", "Min latency (ns)": 4417927, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 239991.46, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.power": 3451.6956666666674, "characteristics.power.normalized_per_core": 431.4619583333334, "characteristics.power.normalized_per_processor": 431.4619583333334, "characteristics.scheduled_queries_per_second": 239991.46, "characteristics.scheduled_queries_per_second.normalized_per_core": 29998.9325, "characteristics.scheduled_queries_per_second.normalized_per_processor": 29998.9325, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 240000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "f7a1dcf0a482f820", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 12785652, "90.00 percentile latency (ns)": 13438643, "95.00 percentile latency (ns)": 13582755, "97.00 percentile latency (ns)": 13668914, "99.00 percentile latency (ns)": 13818866, "99.90 percentile latency (ns)": 14080983, "Completed samples per second": 87982.52, "Max latency (ns)": 23931543, "Mean latency (ns)": 12788521, "Min duration satisfied": "Yes", "Min latency (ns)": 2830926, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 87983.67, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 87983.67, "characteristics.scheduled_queries_per_second.normalized_per_core": 10997.95875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 10997.95875, "characteristics.total": 50000, "ck_system": "A10x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 88000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "9a7d84aeabe12105", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5910274, "90.00 percentile latency (ns)": 8953940, "95.00 percentile latency (ns)": 10429680, "97.00 percentile latency (ns)": 11554089, "99.00 percentile latency (ns)": 13976501, "99.90 percentile latency (ns)": 19098376, "Completed samples per second": 4819.72, "Max latency (ns)": 113491508, "Mean latency (ns)": 6676096, "Min duration satisfied": "Yes", "Min latency (ns)": 4333795, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 4819.79, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.304, "characteristics.good": 38152, "characteristics.scheduled_queries_per_second": 4819.79, "characteristics.scheduled_queries_per_second.normalized_per_core": 43.033839285714286, "characteristics.scheduled_queries_per_second.normalized_per_processor": 1204.9475, "characteristics.total": 50000, "ck_system": "Triton_CPU_4S_8380Hx1", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVino 2021.2.200", "host_memory_capacity": "1536 GB", "host_memory_configuration": "6 slots / 32GB each / 3200 MT/s per socket", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8380H CPU @ 2.90GHz", "host_processors_per_node": 4, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "fp32", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 112, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Triton_CPU_4S_8380Hx1", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS", "other_hardware": "", "other_software_stack": "Tensorflow 2.4.0, OpenVino 2021.2.200, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "No", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "The original weight filename: https://zenodo.org/record/2535873/files/resnet50_v1.pb", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "CPU Inference on Triton Inference Server", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Triton_CPU_4S_8380Hx1", "system_name": "Supermicro SYS-240P-TNRT (Cooper Lake running Triton)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 4819.5, "task": "image classification", "task2": "image classification", "total_cores": 112, "uid": "c42d8b404fbb722b", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "We transform the original fp32 weight to int8 weight using symmetric quantization." }, { "50.00 percentile latency (ns)": 11251028, "90.00 percentile latency (ns)": 12268910, "95.00 percentile latency (ns)": 12518496, "97.00 percentile latency (ns)": 12671419, "99.00 percentile latency (ns)": 12941389, "99.90 percentile latency (ns)": 13368364, "Completed samples per second": 60023.6, "Max latency (ns)": 18918140, "Mean latency (ns)": 11160264, "Min duration satisfied": "Yes", "Min latency (ns)": 6967892, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 60030.57, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA TITAN RTX", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.034, "characteristics.good": 38017, "characteristics.scheduled_queries_per_second": 60030.57, "characteristics.scheduled_queries_per_second.normalized_per_core": 15007.6425, "characteristics.scheduled_queries_per_second.normalized_per_processor": 15007.6425, "characteristics.total": 50000, "ck_system": "TitanRTXx4", "ck_used": false, "cooling": "watercooled", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 24, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) 8268", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.5, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/TitanRTXx4", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 1, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/TitanRTXx4", "system_name": "SCAN 3XS DBP T496X2 Fluid", "target_latency (ns)": 15000000, "target_qps": 60000, "task": "image classification", "task2": "image classification", "total_cores": 48, "uid": "74c85ac1669d9e14", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 6835149, "90.00 percentile latency (ns)": 7170695, "95.00 percentile latency (ns)": 7234032, "97.00 percentile latency (ns)": 7272895, "99.00 percentile latency (ns)": 7355068, "99.90 percentile latency (ns)": 8130357, "Completed samples per second": 199082.81, "Max latency (ns)": 15051467, "Mean latency (ns)": 6839800, "Min duration satisfied": "Yes", "Min latency (ns)": 4653648, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 199098.25, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA TITAN RTX", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 70.61, "characteristics.good": 35305, "characteristics.scheduled_queries_per_second": 199098.25, "characteristics.scheduled_queries_per_second.normalized_per_core": 49774.5625, "characteristics.scheduled_queries_per_second.normalized_per_processor": 49774.5625, "characteristics.total": 50000, "ck_system": "TitanRTXx4", "ck_used": false, "cooling": "watercooled", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 24, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) 8268", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "mobilenet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.5, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/TitanRTXx4", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 1, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "mobilenet_sym_no_bn.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/TitanRTXx4", "system_name": "SCAN 3XS DBP T496X2 Fluid", "target_latency (ns)": 10000000, "target_qps": 199000, "task": "image classification", "task2": "image classification", "total_cores": 48, "uid": "6e467ef04404dabd", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 11268090, "90.00 percentile latency (ns)": 12855334, "95.00 percentile latency (ns)": 13325519, "97.00 percentile latency (ns)": 13672436, "99.00 percentile latency (ns)": 14676235, "99.90 percentile latency (ns)": 20783745, "Completed samples per second": 103522.98, "Max latency (ns)": 41541687, "Mean latency (ns)": 11461182, "Min duration satisfied": "Yes", "Min latency (ns)": 5305919, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 103532.13, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 20, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.034, "characteristics.good": 38017, "characteristics.scheduled_queries_per_second": 103532.13, "characteristics.scheduled_queries_per_second.normalized_per_core": 5176.6065, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5176.6065, "characteristics.total": 50000, "ck_system": "T4x20", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.5, "normalize_cores": 20, "normalize_processors": 20, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x20", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 1, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20", "system_name": "Supermicro 6049GP-TRT-OTO-29 20xT4", "target_latency (ns)": 15000000, "target_qps": 103500, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "9b2d3318a4ffe55c", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 6207536, "90.00 percentile latency (ns)": 7991341, "95.00 percentile latency (ns)": 8605947, "97.00 percentile latency (ns)": 9059141, "99.00 percentile latency (ns)": 10159013, "99.90 percentile latency (ns)": 12561017, "Completed samples per second": 41542.28, "Max latency (ns)": 20699844, "Mean latency (ns)": 6545549, "Min duration satisfied": "Yes", "Min latency (ns)": 4404820, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 41546.64, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.034, "characteristics.good": 38017, "characteristics.scheduled_queries_per_second": 41546.64, "characteristics.scheduled_queries_per_second.normalized_per_core": 5193.33, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5193.33, "characteristics.total": 50000, "ck_system": "T4x8", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.5, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x8", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 1, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8", "system_name": "Supermicro 4029GP-TRT-OTO-28 8xT4", "target_latency (ns)": 15000000, "target_qps": 41500, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "e9c11ccd1efb2949", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3076947, "90.00 percentile latency (ns)": 3318039, "95.00 percentile latency (ns)": 3440724, "97.00 percentile latency (ns)": 3589866, "99.00 percentile latency (ns)": 4626978, "99.90 percentile latency (ns)": 11111734, "Completed samples per second": 135058.92, "Max latency (ns)": 23779840, "Mean latency (ns)": 3136351, "Min duration satisfied": "Yes", "Min latency (ns)": 2307992, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 135072.99, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 70.61, "characteristics.good": 35305, "characteristics.scheduled_queries_per_second": 135072.99, "characteristics.scheduled_queries_per_second.normalized_per_core": 16884.12375, "characteristics.scheduled_queries_per_second.normalized_per_processor": 16884.12375, "characteristics.total": 50000, "ck_system": "T4x8", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "mobilenet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.5, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x8", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 1, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "mobilenet_sym_no_bn.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8", "system_name": "Supermicro 4029GP-TRT-OTO-28 8xT4", "target_latency (ns)": 10000000, "target_qps": 135000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "4e65ba0de0a1bd0a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 7028442, "90.00 percentile latency (ns)": 7901882, "95.00 percentile latency (ns)": 8051201, "97.00 percentile latency (ns)": 8235057, "99.00 percentile latency (ns)": 9860840, "99.90 percentile latency (ns)": 18353408, "Completed samples per second": 206602.21, "Max latency (ns)": 1003768376, "Mean latency (ns)": 7146373, "Min duration satisfied": "Yes", "Min latency (ns)": 5649233, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 210057.67, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 210057.67, "characteristics.scheduled_queries_per_second.normalized_per_core": 26257.20875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26257.20875, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM4x8_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x8_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x8_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 210000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "1205c1ca7de344ce", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 10799750, "90.00 percentile latency (ns)": 11494287, "95.00 percentile latency (ns)": 11737036, "97.00 percentile latency (ns)": 11910122, "99.00 percentile latency (ns)": 12558966, "99.90 percentile latency (ns)": 18052952, "Completed samples per second": 111478.93, "Max latency (ns)": 31660916, "Mean latency (ns)": 10813115, "Min duration satisfied": "Yes", "Min latency (ns)": 4505544, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 111495.51, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 20, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 111495.51, "characteristics.scheduled_queries_per_second.normalized_per_core": 5574.7755, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5574.7755, "characteristics.total": 50000, "ck_system": "T4x20_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 20, "normalize_processors": 20, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x20_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20_TRT", "system_name": "Supermicro 6049GP-TRT-OTO-29 (20x T4, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 111500, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "4cd1a76ad41dab2e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3941159, "90.00 percentile latency (ns)": 4631234, "95.00 percentile latency (ns)": 4849106, "97.00 percentile latency (ns)": 5039316, "99.00 percentile latency (ns)": 5492006, "99.90 percentile latency (ns)": 39215711, "Completed samples per second": 52421.23, "Max latency (ns)": 47588169, "Mean latency (ns)": 4103319, "Min duration satisfied": "Yes", "Min latency (ns)": 2862190, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 52425.14, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 52425.14, "characteristics.scheduled_queries_per_second.normalized_per_core": 26212.57, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26212.57, "characteristics.total": 50000, "ck_system": "A100-PCIex2_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex2_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex2_TRT", "system_name": "Gigabyte G482-Z52 (2x A100-PCIe, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 52400, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "7a1445e9d6037ef3", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 7191978, "90.00 percentile latency (ns)": 8274173, "95.00 percentile latency (ns)": 8628672, "97.00 percentile latency (ns)": 8874070, "99.00 percentile latency (ns)": 9656161, "99.90 percentile latency (ns)": 27479994, "Completed samples per second": 255039.01, "Max latency (ns)": 31871628, "Mean latency (ns)": 7408144, "Min duration satisfied": "Yes", "Min latency (ns)": 5055653, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 255061.34, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 255061.34, "characteristics.scheduled_queries_per_second.normalized_per_core": 31882.6675, "characteristics.scheduled_queries_per_second.normalized_per_processor": 31882.6675, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM4x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x8_TRT", "system_name": "NVIDIA DGX-A100 (8x A100-SXM4, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 255000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "eb498b84b0dae060", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5803951, "90.00 percentile latency (ns)": 7515173, "95.00 percentile latency (ns)": 8049572, "97.00 percentile latency (ns)": 8431562, "99.00 percentile latency (ns)": 9310944, "99.90 percentile latency (ns)": 11562729, "Completed samples per second": 44500.92, "Max latency (ns)": 15926637, "Mean latency (ns)": 6142260, "Min duration satisfied": "Yes", "Min latency (ns)": 4162166, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 44504.81, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 44504.81, "characteristics.scheduled_queries_per_second.normalized_per_core": 5563.10125, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5563.10125, "characteristics.total": 50000, "ck_system": "T4x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8_TRT", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x T4, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 44500, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "f20a140a485eb1aa", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 9782141, "90.00 percentile latency (ns)": 10344827, "95.00 percentile latency (ns)": 10593350, "97.00 percentile latency (ns)": 10812166, "99.00 percentile latency (ns)": 11391532, "99.90 percentile latency (ns)": 13774327, "Completed samples per second": 102281.44, "Max latency (ns)": 1005341116, "Mean latency (ns)": 9830685, "Min duration satisfied": "Yes", "Min latency (ns)": 7778115, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 103995.07, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 20, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 103995.07, "characteristics.scheduled_queries_per_second.normalized_per_core": 5199.753500000001, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5199.753500000001, "characteristics.total": 50000, "ck_system": "T4x20_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 20, "normalize_processors": 20, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x20_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 104000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "04a7d485f9df9029", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 7578484, "90.00 percentile latency (ns)": 8879122, "95.00 percentile latency (ns)": 9170199, "97.00 percentile latency (ns)": 9362540, "99.00 percentile latency (ns)": 9682713, "99.90 percentile latency (ns)": 12498134, "Completed samples per second": 41316.18, "Max latency (ns)": 1003109372, "Mean latency (ns)": 7333655, "Min duration satisfied": "Yes", "Min latency (ns)": 4372487, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 42006.89, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 42006.89, "characteristics.scheduled_queries_per_second.normalized_per_core": 5250.86125, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5250.86125, "characteristics.total": 50000, "ck_system": "T4x8_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x8_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 42000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "7bffd52bb41bb927", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 4392352, "90.00 percentile latency (ns)": 5250650, "95.00 percentile latency (ns)": 5551151, "97.00 percentile latency (ns)": 5799406, "99.00 percentile latency (ns)": 8019673, "99.90 percentile latency (ns)": 33539162, "Completed samples per second": 39340.27, "Max latency (ns)": 1004231228, "Mean latency (ns)": 4598337, "Min duration satisfied": "Yes", "Min latency (ns)": 3085670, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 39997.81, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 39997.81, "characteristics.scheduled_queries_per_second.normalized_per_core": 19998.905, "characteristics.scheduled_queries_per_second.normalized_per_processor": 19998.905, "characteristics.total": 50000, "ck_system": "A100-PCIex2_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex2_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex2_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 40000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "1082f4819b70fde1", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" } ]