[ { "50.00 percentile latency (ns)": 317439872011, "90.00 percentile latency (ns)": 572092833280, "95.00 percentile latency (ns)": 603911957301, "97.00 percentile latency (ns)": 616653041247, "99.00 percentile latency (ns)": 629370886461, "99.90 percentile latency (ns)": 635121120545, "Max latency (ns)": 635739422828, "Mean latency (ns)": 54579372604, "Min duration satisfied": "Yes", "Min latency (ns)": 85291261, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 110357, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 110357, "characteristics.samples_per_second.normalized_per_core": 13794.625, "characteristics.samples_per_second.normalized_per_processor": 13794.625, "characteristics.total": 50000, "ck_system": "A10x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 70158000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT_Triton", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 106300, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "e98e532b288c891c", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 333617322810, "90.00 percentile latency (ns)": 596188366971, "95.00 percentile latency (ns)": 629000739229, "97.00 percentile latency (ns)": 642130545506, "99.00 percentile latency (ns)": 655257657257, "99.90 percentile latency (ns)": 661619711474, "Max latency (ns)": 664961981063, "Mean latency (ns)": 40024541996, "Min duration satisfied": "Yes", "Min latency (ns)": 4339980384, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_MultiMigServer", "Samples per second": 283469, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (7x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 283469, "characteristics.samples_per_second.normalized_per_core": 35433.625, "characteristics.samples_per_second.normalized_per_processor": 35433.625, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 188496000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton", "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB-MIG-7x1g.10gb, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 285600, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "e526fa48f4400ace", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 314469970616, "90.00 percentile latency (ns)": 567735688720, "95.00 percentile latency (ns)": 599485771589, "97.00 percentile latency (ns)": 612208467665, "99.00 percentile latency (ns)": 624914584447, "99.90 percentile latency (ns)": 630611957788, "Max latency (ns)": 631254279363, "Mean latency (ns)": 96450677290, "Min duration satisfied": "Yes", "Min latency (ns)": 115821836, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 133829, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 133829, "characteristics.samples_per_second.normalized_per_core": 33457.25, "characteristics.samples_per_second.normalized_per_processor": 33457.25, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 84480000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_Triton", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 128000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "bcbdc3900411796c", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 339096780133, "90.00 percentile latency (ns)": 608010301288, "95.00 percentile latency (ns)": 641622447853, "97.00 percentile latency (ns)": 655060614392, "99.00 percentile latency (ns)": 668534844384, "99.90 percentile latency (ns)": 681607718638, "Max latency (ns)": 688473750628, "Mean latency (ns)": 160907854192, "Min duration satisfied": "Yes", "Min latency (ns)": 3151254142, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_MultiMigServer", "Samples per second": 150315, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30 (4x1g.6gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.04, "characteristics.good": 38020, "characteristics.samples_per_second": 150315, "characteristics.samples_per_second.normalized_per_core": 18789.375, "characteristics.samples_per_second.normalized_per_processor": 18789.375, "characteristics.total": 50000, "ck_system": "A30-MIG_32x1g.6gb_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_32x1g.6gb_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 103488000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_32x1g.6gb_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A30-MIG-4x1g.6gb, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 156800, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "3af8c4f844944d6b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 317067768905, "90.00 percentile latency (ns)": 570611615887, "95.00 percentile latency (ns)": 602303393464, "97.00 percentile latency (ns)": 614982435875, "99.00 percentile latency (ns)": 627683174605, "99.90 percentile latency (ns)": 633384248128, "Max latency (ns)": 633998391337, "Mean latency (ns)": 117449357554, "Min duration satisfied": "Yes", "Min latency (ns)": 854232791, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 145742, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 145742, "characteristics.samples_per_second.normalized_per_core": 36435.5, "characteristics.samples_per_second.normalized_per_processor": 36435.5, "characteristics.total": 50000, "ck_system": "A100-PCIe-80GB_aarch64x4_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 80, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Ampere Altra Q80-30", "host_processors_per_node": 1, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GB_aarch64x4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 92400000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GB_aarch64x4_TRT", "system_name": "Gigabyte G242-P31 (4x A100-PCIe-80GB_aarch64, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 140000, "task": "image classification", "task2": "image classification", "total_cores": 80, "uid": "139106ec004d09f7", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 305586583160, "90.00 percentile latency (ns)": 549542412052, "95.00 percentile latency (ns)": 580106708587, "97.00 percentile latency (ns)": 592338187715, "99.00 percentile latency (ns)": 604542991015, "99.90 percentile latency (ns)": 610026508659, "Max latency (ns)": 610547607187, "Mean latency (ns)": 103086675709, "Min duration satisfied": "Yes", "Min latency (ns)": 1002337863, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 149178, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 149178, "characteristics.samples_per_second.normalized_per_core": 18647.25, "characteristics.samples_per_second.normalized_per_processor": 18647.25, "characteristics.total": 50000, "ck_system": "A30x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 91080000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT", "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 138000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "a89be363dd778e4b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 324882309092, "90.00 percentile latency (ns)": 584826491304, "95.00 percentile latency (ns)": 617335837262, "97.00 percentile latency (ns)": 630321734835, "99.00 percentile latency (ns)": 643325708791, "99.90 percentile latency (ns)": 649173611762, "Max latency (ns)": 649808421710, "Mean latency (ns)": 140029087840, "Min duration satisfied": "Yes", "Min latency (ns)": 167111225, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 153571, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 153571, "characteristics.samples_per_second.normalized_per_core": 19196.375, "characteristics.samples_per_second.normalized_per_processor": 19196.375, "characteristics.total": 50000, "ck_system": "A30x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 99792000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 151200, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "2dd5534c67b189bc", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 327699006175, "90.00 percentile latency (ns)": 589560098219, "95.00 percentile latency (ns)": 622275914003, "97.00 percentile latency (ns)": 635371716947, "99.00 percentile latency (ns)": 648457386046, "99.90 percentile latency (ns)": 654348521136, "Max latency (ns)": 655000179166, "Mean latency (ns)": 41604900948, "Min duration satisfied": "Yes", "Min latency (ns)": 1286234081, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 295237, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 295237, "characteristics.samples_per_second.normalized_per_core": 36904.625, "characteristics.samples_per_second.normalized_per_processor": 36904.625, "characteristics.total": 50000, "ck_system": "A100-PCIe-80GBx8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 193380000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx8_TRT", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 293000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "f3bb142c9b9fac93", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 375136912303, "90.00 percentile latency (ns)": 674387681113, "95.00 percentile latency (ns)": 711777930957, "97.00 percentile latency (ns)": 726740566868, "99.00 percentile latency (ns)": 741709918522, "99.90 percentile latency (ns)": 748431851073, "Max latency (ns)": 749161687595, "Mean latency (ns)": 25712299182, "Min duration satisfied": "Yes", "Min latency (ns)": 1156907057, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 211436, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.power": 1887.2759679572787, "characteristics.power.normalized_per_core": 235.90949599465984, "characteristics.power.normalized_per_processor": 235.90949599465984, "characteristics.samples_per_second": 211436, "characteristics.samples_per_second.normalized_per_core": 26429.5, "characteristics.samples_per_second.normalized_per_processor": 26429.5, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 158400000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_MaxQ", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 240000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "6b491087be20ba8b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 310258137150, "90.00 percentile latency (ns)": 557889018926, "95.00 percentile latency (ns)": 588822577613, "97.00 percentile latency (ns)": 601224818014, "99.00 percentile latency (ns)": 613595767067, "99.90 percentile latency (ns)": 619173733125, "Max latency (ns)": 619757064225, "Mean latency (ns)": 25522740826, "Min duration satisfied": "Yes", "Min latency (ns)": 1290039916, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 313516, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 313516, "characteristics.samples_per_second.normalized_per_core": 39189.5, "characteristics.samples_per_second.normalized_per_processor": 39189.5, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 194304000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT", "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 294400, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "3fd967d99ac946d6", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 308387235006, "90.00 percentile latency (ns)": 556782736932, "95.00 percentile latency (ns)": 587879332720, "97.00 percentile latency (ns)": 600311148520, "99.00 percentile latency (ns)": 612750263460, "99.90 percentile latency (ns)": 618364018786, "Max latency (ns)": 618971983109, "Mean latency (ns)": 90476278563, "Min duration satisfied": "Yes", "Min latency (ns)": 702683588, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 136484, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 136484, "characteristics.samples_per_second.normalized_per_core": 34121.0, "characteristics.samples_per_second.normalized_per_processor": 34121.0, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 84480000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 128000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "d4d934bc38002c51", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 330988574580, "90.00 percentile latency (ns)": 596902098593, "95.00 percentile latency (ns)": 630199693146, "97.00 percentile latency (ns)": 643467157986, "99.00 percentile latency (ns)": 656736890365, "99.90 percentile latency (ns)": 662743026184, "Max latency (ns)": 663360398111, "Mean latency (ns)": 331008684097, "Min duration satisfied": "Yes", "Min latency (ns)": 97337602, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 4551.82, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30 (1x1g.6gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 4551.82, "characteristics.samples_per_second.normalized_per_core": 4551.82, "characteristics.samples_per_second.normalized_per_processor": 4551.82, "characteristics.total": 50000, "ck_system": "A30-MIG_1x1g.6gb_TRT_HeteroMultiUse", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.6gb_TRT_HeteroMultiUse", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 3019500, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.6gb_TRT_HeteroMultiUse", "system_name": "Gigabyte G482-Z54 (1x A30-MIG-1x1g.6gb, TensorRT, HeteroMultiUse)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 4575, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "afeb0aa755314ced", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 319469034467, "90.00 percentile latency (ns)": 574851646875, "95.00 percentile latency (ns)": 606823120142, "97.00 percentile latency (ns)": 619542940866, "99.00 percentile latency (ns)": 632361079485, "99.90 percentile latency (ns)": 638093541257, "Max latency (ns)": 638700216488, "Mean latency (ns)": 319384830943, "Min duration satisfied": "Yes", "Min latency (ns)": 118201135, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 5270.08, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 5270.08, "characteristics.samples_per_second.normalized_per_core": 5270.08, "characteristics.samples_per_second.normalized_per_processor": 5270.08, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 3366000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, HeteroMultiUse)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 5100, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "cf21f4351bd23788", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 337030259267, "90.00 percentile latency (ns)": 607054023654, "95.00 percentile latency (ns)": 640834638916, "97.00 percentile latency (ns)": 654329379859, "99.00 percentile latency (ns)": 667842691017, "99.90 percentile latency (ns)": 673928172833, "Max latency (ns)": 674589988657, "Mean latency (ns)": 118859682851, "Min duration satisfied": "Yes", "Min latency (ns)": 647759385, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 125232, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.power": 1129.2031111111105, "characteristics.power.normalized_per_core": 282.3007777777776, "characteristics.power.normalized_per_processor": 282.3007777777776, "characteristics.samples_per_second": 125232, "characteristics.samples_per_second.normalized_per_core": 31308.0, "characteristics.samples_per_second.normalized_per_processor": 31308.0, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 84480000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 128000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "de3f3572b11f48d3", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 318827324660, "90.00 percentile latency (ns)": 573581422791, "95.00 percentile latency (ns)": 605415838214, "97.00 percentile latency (ns)": 618142810848, "99.00 percentile latency (ns)": 630899128499, "99.90 percentile latency (ns)": 636625345896, "Max latency (ns)": 637252585444, "Mean latency (ns)": 100499649758, "Min duration satisfied": "Yes", "Min latency (ns)": 1179903949, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 265138, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 265138, "characteristics.samples_per_second.normalized_per_core": 33142.25, "characteristics.samples_per_second.normalized_per_processor": 33142.25, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 168960000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 256000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "05b545d4eb18b62f", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 319516784992, "90.00 percentile latency (ns)": 575549253973, "95.00 percentile latency (ns)": 607565854741, "97.00 percentile latency (ns)": 620360498782, "99.00 percentile latency (ns)": 633148246059, "99.90 percentile latency (ns)": 638920873733, "Max latency (ns)": 639553458253, "Mean latency (ns)": 101168149450, "Min duration satisfied": "Yes", "Min latency (ns)": 125581133, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 264184, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 264184, "characteristics.samples_per_second.normalized_per_core": 33023.0, "characteristics.samples_per_second.normalized_per_processor": 33023.0, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 168960000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 256000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "1bc7d4e8bbc5169f", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 334476525326, "90.00 percentile latency (ns)": 601384675724, "95.00 percentile latency (ns)": 634714453349, "97.00 percentile latency (ns)": 648053583512, "99.00 percentile latency (ns)": 661424765103, "99.90 percentile latency (ns)": 667422748890, "Max latency (ns)": 668057842200, "Mean latency (ns)": 64043158635, "Min duration satisfied": "Yes", "Min latency (ns)": 1350814675, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Concurrent_Triton_Server", "Samples per second": 306261, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 306261, "characteristics.samples_per_second.normalized_per_core": 38282.625, "characteristics.samples_per_second.normalized_per_processor": 38282.625, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 204600000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_Triton", "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 310000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "99e53f444102869e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 397393374363, "90.00 percentile latency (ns)": 715108797165, "95.00 percentile latency (ns)": 754838924992, "97.00 percentile latency (ns)": 770768957524, "99.00 percentile latency (ns)": 786660360489, "99.90 percentile latency (ns)": 793782527980, "Max latency (ns)": 794578131983, "Mean latency (ns)": 17662503772, "Min duration satisfied": "Yes", "Min latency (ns)": 1298248748, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 244537, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.power": 2946.1950881612097, "characteristics.power.normalized_per_core": 368.2743860201512, "characteristics.power.normalized_per_processor": 368.2743860201512, "characteristics.samples_per_second": 244537, "characteristics.samples_per_second.normalized_per_core": 30567.125, "characteristics.samples_per_second.normalized_per_processor": 30567.125, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 194304000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 294400, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "3f414aff8c034bdf", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 342286480531, "90.00 percentile latency (ns)": 612282091452, "95.00 percentile latency (ns)": 645668388160, "97.00 percentile latency (ns)": 659089209004, "99.00 percentile latency (ns)": 672304338311, "99.90 percentile latency (ns)": 678266542458, "Max latency (ns)": 678929245759, "Mean latency (ns)": 55330295258, "Min duration satisfied": "Yes", "Min latency (ns)": 76997615, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 284831, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 284831, "characteristics.samples_per_second.normalized_per_core": 35603.875, "characteristics.samples_per_second.normalized_per_processor": 35603.875, "characteristics.total": 50000, "ck_system": "A100-PCIe-80GBx8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 193380000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx8_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 293000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "ae64bdaba35168f4", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 311331602806, "90.00 percentile latency (ns)": 560572567515, "95.00 percentile latency (ns)": 591711868535, "97.00 percentile latency (ns)": 604190440652, "99.00 percentile latency (ns)": 616639215625, "99.90 percentile latency (ns)": 622265183355, "Max latency (ns)": 622884882410, "Mean latency (ns)": 42680902819, "Min duration satisfied": "Yes", "Min latency (ns)": 788424581, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 110197, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 110197, "characteristics.samples_per_second.normalized_per_core": 13774.625, "characteristics.samples_per_second.normalized_per_processor": 13774.625, "characteristics.total": 50000, "ck_system": "A10x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 68640000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 104000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "5a9b6a96470a4290", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 326926116984, "90.00 percentile latency (ns)": 588550045228, "95.00 percentile latency (ns)": 621258263440, "97.00 percentile latency (ns)": 634344404914, "99.00 percentile latency (ns)": 647415008760, "99.90 percentile latency (ns)": 653302641424, "Max latency (ns)": 653954708648, "Mean latency (ns)": 326970606537, "Min duration satisfied": "Yes", "Min latency (ns)": 39349977, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 5741.59, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.326, "characteristics.good": 38163, "characteristics.samples_per_second": 5741.59, "characteristics.samples_per_second.normalized_per_core": 51.26419642857143, "characteristics.samples_per_second.normalized_per_processor": 1435.3975, "characteristics.total": 50000, "ck_system": "Triton_CPU_4S_8380Hx1", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVino 2021", "host_memory_capacity": "1536 GB", "host_memory_configuration": "12 slots / 32GB each / 3200 MT/s per socket", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 8380H CPU @ 2.70GHz", "host_processors_per_node": 4, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "fp32", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 112, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/Triton_CPU_4S_8380Hx1", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "Tensorflow 2.4.0, OpenVino 2021, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 3754740, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "The original weight filename: https://zenodo.org/record/2535873/files/resnet50_v1.pb", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "CPU Inference on Triton Inference Server", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Triton_CPU_4S_8380Hx1", "system_name": "Supermicro SYS-240P-TNRT (Cooper Lake running Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 5689, "task": "image classification", "task2": "image classification", "total_cores": 112, "uid": "fbf3150531f42bd8", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "We transform the original fp32 weight to int8 weight using symmetric quantization." }, { "50.00 percentile latency (ns)": 341359895004, "90.00 percentile latency (ns)": 609749671816, "95.00 percentile latency (ns)": 643295897730, "97.00 percentile latency (ns)": 656712626949, "99.00 percentile latency (ns)": 670137927579, "99.90 percentile latency (ns)": 678779893736, "Max latency (ns)": 683992641724, "Mean latency (ns)": 47792367177, "Min duration satisfied": "Yes", "Min latency (ns)": 4878523807, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_MultiMigServer", "Samples per second": 275582, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (7x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 275582, "characteristics.samples_per_second.normalized_per_core": 34447.75, "characteristics.samples_per_second.normalized_per_processor": 34447.75, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "open", "filesystem": "", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/open/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/open/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 188496000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_56x1g.10gb_TRT_Triton", "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB-MIG-7x1g.10gb, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 285600, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "f0ad17db446a5a0c", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 308751152368, "90.00 percentile latency (ns)": 555544885451, "95.00 percentile latency (ns)": 586404200665, "97.00 percentile latency (ns)": 598734183835, "99.00 percentile latency (ns)": 611065643526, "99.90 percentile latency (ns)": 616590554657, "Max latency (ns)": 617125438459, "Mean latency (ns)": 308730425588, "Min duration satisfied": "Yes", "Min latency (ns)": 397812643, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Int4_Server", "Samples per second": 19250.5, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.104, "characteristics.good": 38052, "characteristics.samples_per_second": 19250.5, "characteristics.samples_per_second.normalized_per_core": 19250.5, "characteristics.samples_per_second.normalized_per_processor": 19250.5, "characteristics.total": 50000, "ck_system": "A10x1", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "open", "filesystem": "", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "fp32", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/open/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/open/NVIDIA/results/A10x1", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "CUDA 11.1, Driver 460.32.03; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 1024, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 11880000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "rdi", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x1", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 18000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "b80497670207f5a2", "use_accelerator": true, "weight_data_types": "int4", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 311318073287, "90.00 percentile latency (ns)": 560302259647, "95.00 percentile latency (ns)": 591430247701, "97.00 percentile latency (ns)": 603879580648, "99.00 percentile latency (ns)": 616345865990, "99.90 percentile latency (ns)": 621959139201, "Max latency (ns)": 622528958760, "Mean latency (ns)": 117416504658, "Min duration satisfied": "Yes", "Min latency (ns)": 1868871966, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Int4_Server", "Samples per second": 152668, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.104, "characteristics.good": 38052, "characteristics.samples_per_second": 152668, "characteristics.samples_per_second.normalized_per_core": 19083.5, "characteristics.samples_per_second.normalized_per_processor": 19083.5, "characteristics.total": 50000, "ck_system": "A10x8", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "open", "filesystem": "", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "fp32", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/open/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/open/NVIDIA/results/A10x8", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "CUDA 11.1, Driver 460.32.03; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 1024, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 95040000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "rdi", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 144000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "3dfe414eb0fc15c0", "use_accelerator": true, "weight_data_types": "int4", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 323635345651, "90.00 percentile latency (ns)": 583197372232, "95.00 percentile latency (ns)": 615630252980, "97.00 percentile latency (ns)": 628623617686, "99.00 percentile latency (ns)": 641575498500, "99.90 percentile latency (ns)": 647449931866, "Max latency (ns)": 648076005097, "Mean latency (ns)": 54963890632, "Min duration satisfied": "Yes", "Min latency (ns)": 89249257, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 105914, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 105914, "characteristics.samples_per_second.normalized_per_core": 13239.25, "characteristics.samples_per_second.normalized_per_processor": 13239.25, "characteristics.total": 50000, "ck_system": "A10x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 68640000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT_Triton", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 104000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "edd98445417a57ae", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 319633640398, "90.00 percentile latency (ns)": 575242056333, "95.00 percentile latency (ns)": 607135182102, "97.00 percentile latency (ns)": 619844492931, "99.00 percentile latency (ns)": 632778921274, "99.90 percentile latency (ns)": 638382136860, "Max latency (ns)": 638995757223, "Mean latency (ns)": 319637175007, "Min duration satisfied": "Yes", "Min latency (ns)": 431233807, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 38009.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 38009.6, "characteristics.samples_per_second.normalized_per_core": 38009.6, "characteristics.samples_per_second.normalized_per_processor": 38009.6, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_datacenter", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_datacenter", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 24288000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_datacenter", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "8da8729a092967b9", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 329456757250, "90.00 percentile latency (ns)": 593081960448, "95.00 percentile latency (ns)": 626118643537, "97.00 percentile latency (ns)": 639352495668, "99.00 percentile latency (ns)": 652600593395, "99.90 percentile latency (ns)": 658561191858, "Max latency (ns)": 659236464621, "Mean latency (ns)": 329520606481, "Min duration satisfied": "Yes", "Min latency (ns)": 46413120, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 2673.09, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.304, "characteristics.good": 38152, "characteristics.samples_per_second": 2673.09, "characteristics.samples_per_second.normalized_per_core": 11.137875000000001, "characteristics.samples_per_second.normalized_per_processor": 1336.545, "characteristics.total": 50000, "ck_system": "Triton_CPU_2S_6258Rx1", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVino 2021.2.200", "host_memory_capacity": "768 GB", "host_memory_configuration": "6 slots / 32GB each / 2934 MT/s per socket", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "fp32", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 240, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Triton_CPU_2S_6258Rx1", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "OpenVino 2021.2.200, Triton 21.02; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "No", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1762200, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "The original weight filename: https://zenodo.org/record/2535873/files/resnet50_v1.pb", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "CPU Inference on Triton Inference ServerPowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Triton_CPU_2S_6258Rx1", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 2670, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "5621427e864b70e1", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "We transform the original fp32 weight to int8 weight using symmetric quantization." }, { "50.00 percentile latency (ns)": 319453417540, "90.00 percentile latency (ns)": 575090323108, "95.00 percentile latency (ns)": 607050125337, "97.00 percentile latency (ns)": 619817296654, "99.00 percentile latency (ns)": 632585932327, "99.90 percentile latency (ns)": 638350012152, "Max latency (ns)": 638963355886, "Mean latency (ns)": 319466977522, "Min duration satisfied": "Yes", "Min latency (ns)": 90520472, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 38011.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 38011.6, "characteristics.samples_per_second.normalized_per_core": 38011.6, "characteristics.samples_per_second.normalized_per_processor": 38011.6, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_Triton_datacenter", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_Triton_datacenter", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 24288000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_Triton_datacenter", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "4e75544f7b840359", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 322343249440, "90.00 percentile latency (ns)": 579413827707, "95.00 percentile latency (ns)": 611587734342, "97.00 percentile latency (ns)": 624444851174, "99.00 percentile latency (ns)": 637230260596, "99.90 percentile latency (ns)": 643068606646, "Max latency (ns)": 643594006503, "Mean latency (ns)": 119805959351, "Min duration satisfied": "Yes", "Min latency (ns)": 1209605881, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 141518, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 141518, "characteristics.samples_per_second.normalized_per_core": 17689.75, "characteristics.samples_per_second.normalized_per_processor": 17689.75, "characteristics.total": 50000, "ck_system": "A30x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 91080000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT", "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 138000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "7f3ab7c5674d1094", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 308632092250, "90.00 percentile latency (ns)": 555685503049, "95.00 percentile latency (ns)": 586592500864, "97.00 percentile latency (ns)": 598936388408, "99.00 percentile latency (ns)": 611273312250, "99.90 percentile latency (ns)": 616851297552, "Max latency (ns)": 617483128564, "Mean latency (ns)": 106116231075, "Min duration satisfied": "Yes", "Min latency (ns)": 165306920, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 147502, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 147502, "characteristics.samples_per_second.normalized_per_core": 18437.75, "characteristics.samples_per_second.normalized_per_processor": 18437.75, "characteristics.total": 50000, "ck_system": "A30x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 91080000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT_Triton", "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 138000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "937e4b37bd6bacc1", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 340392387207, "90.00 percentile latency (ns)": 611993442324, "95.00 percentile latency (ns)": 645851106732, "97.00 percentile latency (ns)": 659450223266, "99.00 percentile latency (ns)": 673037719476, "99.90 percentile latency (ns)": 679104540277, "Max latency (ns)": 679779767099, "Mean latency (ns)": 86406014156, "Min duration satisfied": "Yes", "Min latency (ns)": 1338140057, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 213599, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.power": 2195.056764705881, "characteristics.power.normalized_per_core": 274.38209558823513, "characteristics.power.normalized_per_processor": 274.38209558823513, "characteristics.samples_per_second": 213599, "characteristics.samples_per_second.normalized_per_core": 26699.875, "characteristics.samples_per_second.normalized_per_processor": 26699.875, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 145200000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_MaxQ", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 220000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "534ed49baee2f9eb", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 319185506432, "90.00 percentile latency (ns)": 573670362718, "95.00 percentile latency (ns)": 605519779121, "97.00 percentile latency (ns)": 618240318881, "99.00 percentile latency (ns)": 630967501245, "99.90 percentile latency (ns)": 636686696840, "Max latency (ns)": 637322122222, "Mean latency (ns)": 34418071842, "Min duration satisfied": "Yes", "Min latency (ns)": 1580232544, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 304876, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 304876, "characteristics.samples_per_second.normalized_per_core": 38109.5, "characteristics.samples_per_second.normalized_per_processor": 38109.5, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 194304000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT", "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 294400, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "50a28d6b7d78f408", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 316403304983, "90.00 percentile latency (ns)": 571596591834, "95.00 percentile latency (ns)": 603530289600, "97.00 percentile latency (ns)": 616353860372, "99.00 percentile latency (ns)": 629135658149, "99.90 percentile latency (ns)": 634922254391, "Max latency (ns)": 635539494289, "Mean latency (ns)": 98619743759, "Min duration satisfied": "Yes", "Min latency (ns)": 843045792, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 132926, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 132926, "characteristics.samples_per_second.normalized_per_core": 33231.5, "characteristics.samples_per_second.normalized_per_processor": 33231.5, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 84480000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 128000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "efe8cd8c32bb2e0e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 329509474204, "90.00 percentile latency (ns)": 593062638700, "95.00 percentile latency (ns)": 626045487035, "97.00 percentile latency (ns)": 639185894504, "99.00 percentile latency (ns)": 652409557380, "99.90 percentile latency (ns)": 658320329207, "Max latency (ns)": 658943874401, "Mean latency (ns)": 329480914690, "Min duration satisfied": "Yes", "Min latency (ns)": 124678314, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 5108.17, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 5108.17, "characteristics.samples_per_second.normalized_per_core": 5108.17, "characteristics.samples_per_second.normalized_per_processor": 5108.17, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 3366000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse", "system_name": "NVIDIA DGX-A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, HeteroMultiUse)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 5100, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "5034c52adaa51fec", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 338575173146, "90.00 percentile latency (ns)": 610417827641, "95.00 percentile latency (ns)": 644383746489, "97.00 percentile latency (ns)": 657973473965, "99.00 percentile latency (ns)": 671598246553, "99.90 percentile latency (ns)": 677739616440, "Max latency (ns)": 678395208829, "Mean latency (ns)": 120514607200, "Min duration satisfied": "Yes", "Min latency (ns)": 887399145, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 124529, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.power": 1268.9517699115058, "characteristics.power.normalized_per_core": 317.23794247787646, "characteristics.power.normalized_per_processor": 317.23794247787646, "characteristics.samples_per_second": 124529, "characteristics.samples_per_second.normalized_per_core": 31132.25, "characteristics.samples_per_second.normalized_per_processor": 31132.25, "characteristics.total": 50000, "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 1, "host_storage_capacity": "10 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 84480000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ", "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 128000, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "ca643fb74f99823b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 392039975724, "90.00 percentile latency (ns)": 704700258024, "95.00 percentile latency (ns)": 743848985971, "97.00 percentile latency (ns)": 759456419322, "99.00 percentile latency (ns)": 775110302608, "99.90 percentile latency (ns)": 782138675256, "Max latency (ns)": 782918211643, "Mean latency (ns)": 12282874070, "Min duration satisfied": "Yes", "Min latency (ns)": 1795346596, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 248179, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 248179, "characteristics.samples_per_second.normalized_per_core": 31022.375, "characteristics.samples_per_second.normalized_per_processor": 31022.375, "characteristics.total": 50000, "ck_system": "A100-PCIex8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 194304000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT", "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 294400, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "e3884deaac7fee03", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 321541077112, "90.00 percentile latency (ns)": 578821418089, "95.00 percentile latency (ns)": 610985146367, "97.00 percentile latency (ns)": 623850563199, "99.00 percentile latency (ns)": 636719386260, "99.90 percentile latency (ns)": 642503414886, "Max latency (ns)": 643135145874, "Mean latency (ns)": 36738884994, "Min duration satisfied": "Yes", "Min latency (ns)": 65242440, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 302120, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.024, "characteristics.good": 38012, "characteristics.samples_per_second": 302120, "characteristics.samples_per_second.normalized_per_core": 37765.0, "characteristics.samples_per_second.normalized_per_processor": 37765.0, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 194304000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_Triton", "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 294400, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "9d7171f343adbcdb", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 359376393333, "90.00 percentile latency (ns)": 646084992984, "95.00 percentile latency (ns)": 681911747439, "97.00 percentile latency (ns)": 696262785882, "99.00 percentile latency (ns)": 710585275579, "99.90 percentile latency (ns)": 717049678551, "Max latency (ns)": 717766785353, "Mean latency (ns)": 74604015388, "Min duration satisfied": "Yes", "Min latency (ns)": 1588781651, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 270706, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.power": 3458.6854951185487, "characteristics.power.normalized_per_core": 432.3356868898186, "characteristics.power.normalized_per_processor": 432.3356868898186, "characteristics.samples_per_second": 270706, "characteristics.samples_per_second.normalized_per_core": 33838.25, "characteristics.samples_per_second.normalized_per_processor": 33838.25, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 194304000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ", "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 294400, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "28141e4f0ca9138e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 324710978406, "90.00 percentile latency (ns)": 584557959147, "95.00 percentile latency (ns)": 617041545154, "97.00 percentile latency (ns)": 630034287796, "99.00 percentile latency (ns)": 643037840572, "99.90 percentile latency (ns)": 648886574494, "Max latency (ns)": 649525774238, "Mean latency (ns)": 56036795731, "Min duration satisfied": "Yes", "Min latency (ns)": 815298321, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 105677, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 105677, "characteristics.samples_per_second.normalized_per_core": 13209.625, "characteristics.samples_per_second.normalized_per_processor": 13209.625, "characteristics.total": 50000, "ck_system": "A10x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 68640000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 104000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "94ee0b6653828f38", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 325613893494, "90.00 percentile latency (ns)": 586333413881, "95.00 percentile latency (ns)": 618929274498, "97.00 percentile latency (ns)": 631910416977, "99.00 percentile latency (ns)": 644960757801, "99.90 percentile latency (ns)": 650840219724, "Max latency (ns)": 651485852218, "Mean latency (ns)": 325819216606, "Min duration satisfied": "Yes", "Min latency (ns)": 41491089, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 5763.35, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.304, "characteristics.good": 38152, "characteristics.samples_per_second": 5763.35, "characteristics.samples_per_second.normalized_per_core": 51.45848214285714, "characteristics.samples_per_second.normalized_per_processor": 1440.8375, "characteristics.total": 50000, "ck_system": "Triton_CPU_4S_8380Hx1", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVino 2021.2.200", "host_memory_capacity": "1536 GB", "host_memory_configuration": "6 slots / 32GB each / 3200 MT/s per socket", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8380H CPU @ 2.90GHz", "host_processors_per_node": 4, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "fp32", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 112, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Triton_CPU_4S_8380Hx1", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS", "other_hardware": "", "other_software_stack": "Tensorflow 2.4.0, OpenVino 2021.2.200, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "No", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 3754740, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "The original weight filename: https://zenodo.org/record/2535873/files/resnet50_v1.pb", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "CPU Inference on Triton Inference Server", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Triton_CPU_4S_8380Hx1", "system_name": "Supermicro SYS-240P-TNRT (Cooper Lake running Triton)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 5689, "task": "image classification", "task2": "image classification", "total_cores": 112, "uid": "489bbe17cbc3ae24", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "We transform the original fp32 weight to int8 weight using symmetric quantization." }, { "50.00 percentile latency (ns)": 36104395638, "90.00 percentile latency (ns)": 64916472590, "95.00 percentile latency (ns)": 68533072466, "97.00 percentile latency (ns)": 69968083685, "99.00 percentile latency (ns)": 71383012095, "99.90 percentile latency (ns)": 72025982488, "Max latency (ns)": 72087190753, "Mean latency (ns)": 36143364451, "Min duration satisfied": "Yes", "Min latency (ns)": 259163380, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Int4_Server", "Samples per second": 100711, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA TITAN RTX", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.104, "characteristics.good": 38052, "characteristics.samples_per_second": 100711, "characteristics.samples_per_second.normalized_per_core": 25177.75, "characteristics.samples_per_second.normalized_per_processor": 25177.75, "characteristics.total": 50000, "ck_system": "TitanRTXx4", "ck_used": true, "cooling": "watercooled", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "CUDA 10.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/open/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/open/NVIDIA/results/TitanRTXx4", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "Y", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 7260000, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "model/npdata/*", "status": "RDI", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/TitanRTXx4", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "target_latency (ns)": 0, "target_qps": 110000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "b1d88eabdca84ffd", "use_accelerator": true, "weight_data_types": "int8,int4", "weight_transformations": "quantization, affine fusion, fine-tuning" }, { "50.00 percentile latency (ns)": 33709075105, "90.00 percentile latency (ns)": 60674758173, "95.00 percentile latency (ns)": 64055156265, "97.00 percentile latency (ns)": 65400988797, "99.00 percentile latency (ns)": 66766114167, "99.90 percentile latency (ns)": 67324468943, "Max latency (ns)": 67370391504, "Mean latency (ns)": 33776649551, "Min duration satisfied": "Yes", "Min latency (ns)": 310342800, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Int4_Server", "Samples per second": 181237, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 20, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.104, "characteristics.good": 38052, "characteristics.samples_per_second": 181237, "characteristics.samples_per_second.normalized_per_core": 9061.85, "characteristics.samples_per_second.normalized_per_processor": 9061.85, "characteristics.total": 50000, "ck_system": "T4x20", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "CUDA 10.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 20, "normalize_processors": 20, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/open/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/open/NVIDIA/results/T4x20", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "Y", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 12210000, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "model/npdata/*", "status": "RDI", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "target_latency (ns)": 0, "target_qps": 185000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "48d6dbab04287881", "use_accelerator": true, "weight_data_types": "int8,int4", "weight_transformations": "quantization, affine fusion, fine-tuning" }, { "50.00 percentile latency (ns)": 36809545584, "90.00 percentile latency (ns)": 66436481033, "95.00 percentile latency (ns)": 70169130027, "97.00 percentile latency (ns)": 71625064754, "99.00 percentile latency (ns)": 73180146400, "99.90 percentile latency (ns)": 73751757645, "Max latency (ns)": 73828474499, "Mean latency (ns)": 36894956115, "Min duration satisfied": "Yes", "Min latency (ns)": 263665762, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Int4_Server", "Samples per second": 71517.1, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.104, "characteristics.good": 38052, "characteristics.samples_per_second": 71517.1, "characteristics.samples_per_second.normalized_per_core": 8939.6375, "characteristics.samples_per_second.normalized_per_processor": 8939.6375, "characteristics.total": 50000, "ck_system": "T4x8", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "CUDA 10.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/open/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/open/NVIDIA/results/T4x8", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "Y", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 5280000, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "model/npdata/*", "status": "RDI", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "target_latency (ns)": 0, "target_qps": 80000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "468e443a3b694b03", "use_accelerator": true, "weight_data_types": "int8,int4", "weight_transformations": "quantization, affine fusion, fine-tuning" }, { "50.00 percentile latency (ns)": 32566453285, "90.00 percentile latency (ns)": 58505996822, "95.00 percentile latency (ns)": 61722164165, "97.00 percentile latency (ns)": 63073502103, "99.00 percentile latency (ns)": 64345563811, "99.90 percentile latency (ns)": 64907067732, "Max latency (ns)": 64919672882, "Mean latency (ns)": 32561607598, "Min duration satisfied": "Yes", "Min latency (ns)": 269066016, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 66250.4, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA TITAN RTX", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.034, "characteristics.good": 38017, "characteristics.samples_per_second": 66250.4, "characteristics.samples_per_second.normalized_per_core": 16562.6, "characteristics.samples_per_second.normalized_per_processor": 16562.6, "characteristics.total": 50000, "ck_system": "TitanRTXx4", "ck_used": false, "cooling": "watercooled", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 24, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) 8268", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/TitanRTXx4", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 4300956, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/TitanRTXx4", "system_name": "SCAN 3XS DBP T496X2 Fluid", "target_latency (ns)": 0, "target_qps": 65166, "task": "image classification", "task2": "image classification", "total_cores": 48, "uid": "96038c6849be892c", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32780356596, "90.00 percentile latency (ns)": 58903696096, "95.00 percentile latency (ns)": 62176644240, "97.00 percentile latency (ns)": 63476964675, "99.00 percentile latency (ns)": 64784798831, "99.90 percentile latency (ns)": 65375763856, "Max latency (ns)": 65435756288, "Mean latency (ns)": 32785828553, "Min duration satisfied": "Yes", "Min latency (ns)": 169618852, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 222388, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA TITAN RTX", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 70.814, "characteristics.good": 35407, "characteristics.samples_per_second": 222388, "characteristics.samples_per_second.normalized_per_core": 55597.0, "characteristics.samples_per_second.normalized_per_processor": 55597.0, "characteristics.total": 50000, "ck_system": "TitanRTXx4", "ck_used": false, "cooling": "watercooled", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 24, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) 8268", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "mobilenet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/TitanRTXx4", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 14552142, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "mobilenet_sym_no_bn.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/TitanRTXx4", "system_name": "SCAN 3XS DBP T496X2 Fluid", "target_latency (ns)": 0, "target_qps": 220487, "task": "image classification", "task2": "image classification", "total_cores": 48, "uid": "f114530bdb0c5784", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33358214703, "90.00 percentile latency (ns)": 60028854755, "95.00 percentile latency (ns)": 63364165802, "97.00 percentile latency (ns)": 64691344238, "99.00 percentile latency (ns)": 66018546547, "99.90 percentile latency (ns)": 66616774917, "Max latency (ns)": 66674555686, "Mean latency (ns)": 33372978774, "Min duration satisfied": "Yes", "Min latency (ns)": 47555679, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 2158.93, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA Xavier", "accelerator_on-chip_memories": "1MB (128KB/SM) L1 + 512KB L2 + 4MB (DLA)", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.01, "characteristics.good": 38005, "characteristics.samples_per_second": 2158.93, "characteristics.samples_per_second.normalized_per_core": 2158.93, "characteristics.samples_per_second.normalized_per_processor": 2158.93, "characteristics.total": 50000, "ck_system": "Xavier", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "JetPack 4.3 DP, TensorRT 6.0, cuDNN 7.6.3, CUDA 10.0, cub 1.8.0", "host_memory_capacity": "16 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "8MB L2 (2MB per dual cluster)/4 MB L3 (shared)", "host_processor_core_count": 8, "host_processor_frequency": "2265.5 MHz", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "eMMC 5.1", "hw_notes": "GPU and both DLAs are used in Offline and MultiStream scenarios", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/Xavier", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "pycuda 2019.1, pytorch 1.1, torchvision 0.2.2.post3", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 143946, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier", "system_name": "NVIDIA Jetson AGX Xavier", "target_latency (ns)": 0, "target_qps": 2181, "task": "image classification", "task2": "image classification", "total_cores": 8, "uid": "00cb1fa41649eb0a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33184465844, "90.00 percentile latency (ns)": 59609215568, "95.00 percentile latency (ns)": 62933931005, "97.00 percentile latency (ns)": 64227405521, "99.00 percentile latency (ns)": 65607347592, "99.90 percentile latency (ns)": 66155489853, "Max latency (ns)": 66215120184, "Mean latency (ns)": 33150991229, "Min duration satisfied": "Yes", "Min latency (ns)": 76896385, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 6520.75, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA Xavier", "accelerator_on-chip_memories": "1MB (128KB/SM) L1 + 512KB L2 + 4MB (DLA)", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 70.778, "characteristics.good": 35389, "characteristics.samples_per_second": 6520.75, "characteristics.samples_per_second.normalized_per_core": 6520.75, "characteristics.samples_per_second.normalized_per_processor": 6520.75, "characteristics.total": 50000, "ck_system": "Xavier", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "JetPack 4.3 DP, TensorRT 6.0, cuDNN 7.6.3, CUDA 10.0, cub 1.8.0", "host_memory_capacity": "16 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "8MB L2 (2MB per dual cluster)/4 MB L3 (shared)", "host_processor_core_count": 8, "host_processor_frequency": "2265.5 MHz", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "eMMC 5.1", "hw_notes": "GPU and both DLAs are used in Offline and MultiStream scenarios", "informal_model": "mobilenet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/Xavier", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "pycuda 2019.1, pytorch 1.1, torchvision 0.2.2.post3", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 431772, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "mobilenet_sym_no_bn.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier", "system_name": "NVIDIA Jetson AGX Xavier", "target_latency (ns)": 0, "target_qps": 6542, "task": "image classification", "task2": "image classification", "total_cores": 8, "uid": "d66a33146ee6e51c", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32030167824, "90.00 percentile latency (ns)": 57783369351, "95.00 percentile latency (ns)": 61015505755, "97.00 percentile latency (ns)": 62307658057, "99.00 percentile latency (ns)": 63599572319, "99.90 percentile latency (ns)": 64188152627, "Max latency (ns)": 64238468149, "Mean latency (ns)": 32085623959, "Min duration satisfied": "Yes", "Min latency (ns)": 119151670, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 113592, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 20, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.034, "characteristics.good": 38017, "characteristics.samples_per_second": 113592, "characteristics.samples_per_second.normalized_per_core": 5679.6, "characteristics.samples_per_second.normalized_per_processor": 5679.6, "characteristics.total": 50000, "ck_system": "T4x20", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 20, "normalize_processors": 20, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x20", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 7296960, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20", "system_name": "Supermicro 6049GP-TRT-OTO-29 20xT4", "target_latency (ns)": 0, "target_qps": 110560, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "6f04b48f2a40a279", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32317046238, "90.00 percentile latency (ns)": 58378672368, "95.00 percentile latency (ns)": 61628039839, "97.00 percentile latency (ns)": 62937953446, "99.00 percentile latency (ns)": 64244908377, "99.90 percentile latency (ns)": 64839469848, "Max latency (ns)": 64893819848, "Mean latency (ns)": 32375998088, "Min duration satisfied": "Yes", "Min latency (ns)": 66136712, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 44977.8, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.034, "characteristics.good": 38017, "characteristics.samples_per_second": 44977.8, "characteristics.samples_per_second.normalized_per_core": 5622.225, "characteristics.samples_per_second.normalized_per_processor": 5622.225, "characteristics.total": 50000, "ck_system": "T4x8", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x8", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 2918784, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8", "system_name": "Supermicro 4029GP-TRT-OTO-28 8xT4", "target_latency (ns)": 0, "target_qps": 44224, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "03aac7a4c2da5f7a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33427748204, "90.00 percentile latency (ns)": 60366951863, "95.00 percentile latency (ns)": 63741323071, "97.00 percentile latency (ns)": 65091044799, "99.00 percentile latency (ns)": 66439282383, "99.90 percentile latency (ns)": 67048115051, "Max latency (ns)": 67114768303, "Mean latency (ns)": 33491159140, "Min duration satisfied": "Yes", "Min latency (ns)": 110071160, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 141807, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 70.814, "characteristics.good": 35407, "characteristics.samples_per_second": 141807, "characteristics.samples_per_second.normalized_per_core": 17725.875, "characteristics.samples_per_second.normalized_per_processor": 17725.875, "characteristics.total": 50000, "ck_system": "T4x8", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 6.0, CUDA 10.1, cuDNN 7.6.3, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "mobilenet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/NVIDIA/results/T4x8", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.3", "other_software_stack": "docker 18.09.2, python 3.6.8,gcc 5.5.0,onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 9517332, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "mobilenet_sym_no_bn.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8", "system_name": "Supermicro 4029GP-TRT-OTO-28 8xT4", "target_latency (ns)": 0, "target_qps": 144202, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "983f4e45753c015d", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 35943995043, "90.00 percentile latency (ns)": 64603926238, "95.00 percentile latency (ns)": 68186257895, "97.00 percentile latency (ns)": 69626927171, "99.00 percentile latency (ns)": 71042526104, "99.90 percentile latency (ns)": 71680898839, "Max latency (ns)": 71735251238, "Mean latency (ns)": 35972700864, "Min duration satisfied": "Yes", "Min latency (ns)": 260411693, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Int4_Server", "Samples per second": 101205, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA TITAN RTX", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.104, "characteristics.good": 38052, "characteristics.samples_per_second": 101205, "characteristics.samples_per_second.normalized_per_core": 25301.25, "characteristics.samples_per_second.normalized_per_processor": 25301.25, "characteristics.total": 50000, "ck_system": "TitanRTXx4", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "CUDA 10.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 24, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) 8268", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/NVIDIA/results/TitanRTXx4", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "Y", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 7260000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "model/npdata/*", "status": "rdi", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/TitanRTXx4", "system_name": "SCAN 3XS DBP T496X2 Fluid (4x TitanRTX)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 110000, "task": "image classification", "task2": "image classification", "total_cores": 48, "uid": "0e542cdba396a62e", "use_accelerator": true, "weight_data_types": "int8,int4", "weight_transformations": "quantization, affine fusion, fine-tuning" }, { "50.00 percentile latency (ns)": 33891091422, "90.00 percentile latency (ns)": 61133784408, "95.00 percentile latency (ns)": 64576048557, "97.00 percentile latency (ns)": 65941887530, "99.00 percentile latency (ns)": 67324059018, "99.90 percentile latency (ns)": 67902986082, "Max latency (ns)": 67959640032, "Mean latency (ns)": 34003374693, "Min duration satisfied": "Yes", "Min latency (ns)": 465923645, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Int4_Server", "Samples per second": 179665, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 20, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.104, "characteristics.good": 38052, "characteristics.samples_per_second": 179665, "characteristics.samples_per_second.normalized_per_core": 8983.25, "characteristics.samples_per_second.normalized_per_processor": 8983.25, "characteristics.total": 50000, "ck_system": "T4x20", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "CUDA 10.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 20, "normalize_processors": 20, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/NVIDIA/results/T4x20", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "Y", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 12210000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "model/npdata/*", "status": "rdi", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20", "system_name": "Supermicro 6049GP-TRT-OTO-29 (20x T4)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 185000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "b846fc3d430a98af", "use_accelerator": true, "weight_data_types": "int8,int4", "weight_transformations": "quantization, affine fusion, fine-tuning" }, { "50.00 percentile latency (ns)": 36553380772, "90.00 percentile latency (ns)": 65973463983, "95.00 percentile latency (ns)": 69662321192, "97.00 percentile latency (ns)": 71169596414, "99.00 percentile latency (ns)": 72621778720, "99.90 percentile latency (ns)": 73275137975, "Max latency (ns)": 73313681033, "Mean latency (ns)": 36643582613, "Min duration satisfied": "Yes", "Min latency (ns)": 378014777, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Int4_Server", "Samples per second": 72019.3, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.104, "characteristics.good": 38052, "characteristics.samples_per_second": 72019.3, "characteristics.samples_per_second.normalized_per_core": 9002.4125, "characteristics.samples_per_second.normalized_per_processor": 9002.4125, "characteristics.total": 50000, "ck_system": "T4x8", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "CUDA 10.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/NVIDIA/results/T4x8", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "Y", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 5280000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "model/npdata/*", "status": "rdi", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x T4)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 80000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "8bd5c3a4a04a48ac", "use_accelerator": true, "weight_data_types": "int8,int4", "weight_transformations": "quantization, affine fusion, fine-tuning" }, { "50.00 percentile latency (ns)": 35909777360, "90.00 percentile latency (ns)": 63916666788, "95.00 percentile latency (ns)": 67412691423, "97.00 percentile latency (ns)": 68803260875, "99.00 percentile latency (ns)": 70196473626, "99.90 percentile latency (ns)": 70821466054, "Max latency (ns)": 70877831752, "Mean latency (ns)": 35761838384, "Min duration satisfied": "Yes", "Min latency (ns)": 97840226, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 274139, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 274139, "characteristics.samples_per_second.normalized_per_core": 34267.375, "characteristics.samples_per_second.normalized_per_processor": 34267.375, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM4x8_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x8_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 19430400, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x8_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 294400, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "e8bd765f64d2b25d", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32794076940, "90.00 percentile latency (ns)": 59288946283, "95.00 percentile latency (ns)": 62625964203, "97.00 percentile latency (ns)": 63960674591, "99.00 percentile latency (ns)": 65296785772, "99.90 percentile latency (ns)": 65895071012, "Max latency (ns)": 65965445562, "Mean latency (ns)": 32893076626, "Min duration satisfied": "Yes", "Min latency (ns)": 178341002, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 121564, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 20, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 121564, "characteristics.samples_per_second.normalized_per_core": 6078.2, "characteristics.samples_per_second.normalized_per_processor": 6078.2, "characteristics.total": 50000, "ck_system": "T4x20_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 20, "normalize_processors": 20, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x20_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 8019000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20_TRT", "system_name": "Supermicro 6049GP-TRT-OTO-29 (20x T4, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 121500, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "2f4b83a93914195b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 34212997506, "90.00 percentile latency (ns)": 61845569973, "95.00 percentile latency (ns)": 65199723947, "97.00 percentile latency (ns)": 66544210518, "99.00 percentile latency (ns)": 68023527287, "99.90 percentile latency (ns)": 68534400844, "Max latency (ns)": 68543668955, "Mean latency (ns)": 34334120275, "Min duration satisfied": "Yes", "Min latency (ns)": 356375569, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 63781.8, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 63781.8, "characteristics.samples_per_second.normalized_per_core": 31890.9, "characteristics.samples_per_second.normalized_per_processor": 31890.9, "characteristics.total": 50000, "ck_system": "A100-PCIex2_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex2_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 4371840, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex2_TRT", "system_name": "Gigabyte G482-Z52 (2x A100-PCIe, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 66240, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "0e35a4e5b5e41209", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32643833418, "90.00 percentile latency (ns)": 58565233762, "95.00 percentile latency (ns)": 61853768064, "97.00 percentile latency (ns)": 63123066464, "99.00 percentile latency (ns)": 64437637106, "99.90 percentile latency (ns)": 64982938126, "Max latency (ns)": 65061492721, "Mean latency (ns)": 32655880890, "Min duration satisfied": "Yes", "Min latency (ns)": 391815077, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 298647, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 298647, "characteristics.samples_per_second.normalized_per_core": 37330.875, "characteristics.samples_per_second.normalized_per_processor": 37330.875, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM4x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 19430400, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x8_TRT", "system_name": "NVIDIA DGX-A100 (8x A100-SXM4, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 294400, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "604cf585d826ea83", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33552661249, "90.00 percentile latency (ns)": 60682857831, "95.00 percentile latency (ns)": 64097363496, "97.00 percentile latency (ns)": 65444177811, "99.00 percentile latency (ns)": 66815735478, "99.90 percentile latency (ns)": 67433941900, "Max latency (ns)": 67487759215, "Mean latency (ns)": 33652593201, "Min duration satisfied": "Yes", "Min latency (ns)": 115767434, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 48897.8, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 48897.8, "characteristics.samples_per_second.normalized_per_core": 6112.225, "characteristics.samples_per_second.normalized_per_processor": 6112.225, "characteristics.total": 50000, "ck_system": "T4x8_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 3300000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8_TRT", "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x T4, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 50000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "f8821a060be26828", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32741763952, "90.00 percentile latency (ns)": 59298779293, "95.00 percentile latency (ns)": 62634609662, "97.00 percentile latency (ns)": 63974391311, "99.00 percentile latency (ns)": 65313360948, "99.90 percentile latency (ns)": 65913309334, "Max latency (ns)": 65980583070, "Mean latency (ns)": 32845977674, "Min duration satisfied": "Yes", "Min latency (ns)": 76571718, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 121536, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 20, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 121536, "characteristics.samples_per_second.normalized_per_core": 6076.8, "characteristics.samples_per_second.normalized_per_processor": 6076.8, "characteristics.total": 50000, "ck_system": "T4x20_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 20, "normalize_processors": 20, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x20_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 8019000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 121500, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "fe792d4a05478ab2", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33606990427, "90.00 percentile latency (ns)": 60786243078, "95.00 percentile latency (ns)": 64201897475, "97.00 percentile latency (ns)": 65567760206, "99.00 percentile latency (ns)": 66940177544, "99.90 percentile latency (ns)": 67544422851, "Max latency (ns)": 67619536981, "Mean latency (ns)": 33694671052, "Min duration satisfied": "Yes", "Min latency (ns)": 77483073, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "TRTIS_Server", "Samples per second": 48802.5, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 48802.5, "characteristics.samples_per_second.normalized_per_core": 6100.3125, "characteristics.samples_per_second.normalized_per_processor": 6100.3125, "characteristics.total": 50000, "ck_system": "T4x8_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x8_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 3300000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 50000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "59b1db33a0448148", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 34222594520, "90.00 percentile latency (ns)": 61661771257, "95.00 percentile latency (ns)": 65062975299, "97.00 percentile latency (ns)": 66443039213, "99.00 percentile latency (ns)": 67845054431, "99.90 percentile latency (ns)": 68449641377, "Max latency (ns)": 68493590229, "Mean latency (ns)": 34249439125, "Min duration satisfied": "Yes", "Min latency (ns)": 130457825, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 63828.5, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 63828.5, "characteristics.samples_per_second.normalized_per_core": 31914.25, "characteristics.samples_per_second.normalized_per_processor": 31914.25, "characteristics.total": 50000, "ck_system": "A100-PCIex2_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex2_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 4371840, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex2_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 66240, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "83584268916d3e24", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" } ]