[ { "50.00 percentile latency (ns)": 434010889975, "90.00 percentile latency (ns)": 781256381051, "95.00 percentile latency (ns)": 824740543238, "97.00 percentile latency (ns)": 842109990723, "99.00 percentile latency (ns)": 859509013589, "99.90 percentile latency (ns)": 867215818177, "Max latency (ns)": 868016965588, "Mean latency (ns)": 206498870097, "Min duration satisfied": "Yes", "Min latency (ns)": 883199682, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 93371.4, "Scenario": "offline", "accelerator_cooling_type": "passive", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "1.4.8", "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.power": 1101.2828341013828, "characteristics.power.normalized_per_core": 367.0942780337943, "characteristics.power.normalized_per_processor": 367.0942780337943, "characteristics.samples_per_second": 93371.4, "characteristics.samples_per_second.normalized_per_core": 31123.8, "characteristics.samples_per_second.normalized_per_processor": 31123.8, "characteristics.total": 50000, "ck_system": "R7525_A100_PCIe-40GBx3_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "filesystem": "ext4", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "passive", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.5 GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "4.20.20.20", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R7525_A100_PCIe-40GBx3_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_settings": "closed/DellEMC/measurements/R7525_A100_PCIe-40GBx3_TRT/R7525_A100-PCIe-40GBx3_power_settings.adoc", "print_timestamps": 0, "problem": false, "psu_details": "2x1100W", "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 81048000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100_PCIe-40GBx3_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 122800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "a104bf5d4391b481", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 707845185151, "90.00 percentile latency (ns)": 1274158217375, "95.00 percentile latency (ns)": 1344947469763, "97.00 percentile latency (ns)": 1373270804360, "99.00 percentile latency (ns)": 1401602653482, "99.90 percentile latency (ns)": 1414337647977, "Max latency (ns)": 1415744961340, "Mean latency (ns)": 148884519576, "Min duration satisfied": "Yes", "Min latency (ns)": 372580547, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 23309.3, "Scenario": "offline", "accelerator_cooling_type": "passive", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "1.1.3", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.power": 794.4642655367247, "characteristics.power.normalized_per_core": 198.61606638418118, "characteristics.power.normalized_per_processor": 198.61606638418118, "characteristics.samples_per_second": 23309.3, "characteristics.samples_per_second.normalized_per_core": 5827.325, "characteristics.samples_per_second.normalized_per_processor": 5827.325, "characteristics.total": 50000, "ck_system": "XE2420_datacenter_T4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "filesystem": "ext3/ext4", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "passive", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "4.40.10.00", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/XE2420_datacenter_T4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_settings": "closed/DellEMC/measurements/XE2420_datacenter_T4x4_TRT/XE2420_T4x4_power_settings.adoc", "print_timestamps": 0, "problem": false, "psu_details": "2x2000W", "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 33000000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_datacenter_T4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 50000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "5bad17c63d244b88", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 841063594554, "90.00 percentile latency (ns)": 841063594554, "95.00 percentile latency (ns)": 841063594554, "97.00 percentile latency (ns)": 841063594554, "99.00 percentile latency (ns)": 841063594554, "99.90 percentile latency (ns)": 841063594554, "Max latency (ns)": 841063594554, "Mean latency (ns)": 841063594554, "Min duration satisfied": "Yes", "Min latency (ns)": 841063594554, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "SUT", "Samples per second": 2746.52, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.292, "characteristics.good": 38146, "characteristics.samples_per_second": 2746.52, "characteristics.samples_per_second.normalized_per_core": 49.045, "characteristics.samples_per_second.normalized_per_processor": 1373.26, "characteristics.total": 50000, "ck_system": "R750_Intel6330_openvino", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVINO", "host_memory_capacity": "1TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "2.0GHz", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6330 CPU @ 2.0GHz", "host_processors_per_node": 2, "host_storage_capacity": "1.5TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "normalize_cores": 56, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R750_Intel6330_openvino", "number_of_nodes": 1, "operating_system": "Ubuntu 20.10 (GNU/Linux 5.8.0-45-generic x86_64)", "other_software_stack": "MKL-DNN; URL: github.com/intel/mkl-dnn", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 1024, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "no", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 2310000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_int8.xml", "status": "preview", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750_Intel6330_openvino", "system_name": "Dell EMC PowerEdge R750", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 3500, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "52a488d0e9abb22d", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "TF -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)" }, { "50.00 percentile latency (ns)": 331389542492, "90.00 percentile latency (ns)": 594973326290, "95.00 percentile latency (ns)": 627918695655, "97.00 percentile latency (ns)": 641112337518, "99.00 percentile latency (ns)": 654289615365, "99.90 percentile latency (ns)": 660221091816, "Max latency (ns)": 660887701080, "Mean latency (ns)": 48245625088, "Min duration satisfied": "Yes", "Min latency (ns)": 2156954115, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 295724, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.samples_per_second": 295724, "characteristics.samples_per_second.normalized_per_core": 29572.4, "characteristics.samples_per_second.normalized_per_processor": 29572.4, "characteristics.total": 50000, "ck_system": "DSS8440_A100-PCIE-40GBx10_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/DSS8440_A100-PCIE-40GBx10_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 195440520, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A100-PCIE-40GBx10_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 296122, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "148446d8c7021aa0", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 359709497066, "90.00 percentile latency (ns)": 646707978482, "95.00 percentile latency (ns)": 682586373421, "97.00 percentile latency (ns)": 696916161199, "99.00 percentile latency (ns)": 711301706940, "99.90 percentile latency (ns)": 717766590884, "Max latency (ns)": 718447900874, "Mean latency (ns)": 30878880181, "Min duration satisfied": "Yes", "Min latency (ns)": 780621739, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 156170, "Scenario": "offline", "accelerator_cooling_type": "", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 156170, "characteristics.samples_per_second.normalized_per_core": 39042.5, "characteristics.samples_per_second.normalized_per_processor": 39042.5, "characteristics.total": 50000, "ck_system": "XE8545_7763_A100-SXM4-80GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe SSD", "hw_notes": "500W A100-SXM-80GB", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/XE8545_7763_A100-SXM4-80GBx4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 112200000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_7763_A100-SXM4-80GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 170000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "94324227aef08474", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 339567739577, "90.00 percentile latency (ns)": 610946408674, "95.00 percentile latency (ns)": 644855992948, "97.00 percentile latency (ns)": 658429024875, "99.00 percentile latency (ns)": 671992624635, "99.90 percentile latency (ns)": 678127982599, "Max latency (ns)": 678762570232, "Mean latency (ns)": 159888757634, "Min duration satisfied": "Yes", "Min latency (ns)": 797119306, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 151202, "Scenario": "offline", "accelerator_cooling_type": "", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.samples_per_second": 151202, "characteristics.samples_per_second.normalized_per_core": 37800.5, "characteristics.samples_per_second.normalized_per_processor": 37800.5, "characteristics.total": 50000, "ck_system": "XE8545_7713_A100-SXM4-40GBx4", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/XE8545_7713_A100-SXM4-40GBx4", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 102630000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_7713_A100-SXM4-40GBx4", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 155500, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "4f2cbf24fad92fdb", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 341448853988, "90.00 percentile latency (ns)": 614103165838, "95.00 percentile latency (ns)": 648195203560, "97.00 percentile latency (ns)": 661821717001, "99.00 percentile latency (ns)": 675480303442, "99.90 percentile latency (ns)": 681602722034, "Max latency (ns)": 682268375489, "Mean latency (ns)": 161700727328, "Min duration satisfied": "Yes", "Min latency (ns)": 726577560, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 150425, "Scenario": "offline", "accelerator_cooling_type": "", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 150425, "characteristics.samples_per_second.normalized_per_core": 37606.25, "characteristics.samples_per_second.normalized_per_processor": 37606.25, "characteristics.total": 50000, "ck_system": "XE8545_7763_A100-SXM4-40GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/XE8545_7763_A100-SXM4-40GBx4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 102630000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_7763_A100-SXM4-40GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 155500, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "a99416f312a333bb", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 329917385979, "90.00 percentile latency (ns)": 593542381566, "95.00 percentile latency (ns)": 626351983931, "97.00 percentile latency (ns)": 639559334950, "99.00 percentile latency (ns)": 652708560084, "99.90 percentile latency (ns)": 658598172858, "Max latency (ns)": 659289180418, "Mean latency (ns)": 110224587965, "Min duration satisfied": "Yes", "Min latency (ns)": 1080987710, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 127337, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.samples_per_second": 127337, "characteristics.samples_per_second.normalized_per_core": 31834.25, "characteristics.samples_per_second.normalized_per_processor": 31834.25, "characteristics.total": 50000, "ck_system": "R750xa_A100-PCIE-40GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "256 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R750xa_A100-PCIE-40GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 83952000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-40GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 127200, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "ad301f6f5154edf9", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 331994875018, "90.00 percentile latency (ns)": 597176302235, "95.00 percentile latency (ns)": 630317382886, "97.00 percentile latency (ns)": 643584111618, "99.00 percentile latency (ns)": 656834233728, "99.90 percentile latency (ns)": 662799547763, "Max latency (ns)": 663438769435, "Mean latency (ns)": 42429864911, "Min duration satisfied": "Yes", "Min latency (ns)": 1418257185, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 192000, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A40", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.samples_per_second": 192000, "characteristics.samples_per_second.normalized_per_core": 19200.0, "characteristics.samples_per_second.normalized_per_processor": 19200.0, "characteristics.total": 50000, "ck_system": "DSS8440_A40x10_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/DSS8440_A40x10_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 127380000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A40x10_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 193000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "490ba81c04e8e06b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 351134623723, "90.00 percentile latency (ns)": 633866273447, "95.00 percentile latency (ns)": 669084366215, "97.00 percentile latency (ns)": 683189534745, "99.00 percentile latency (ns)": 697442472373, "99.90 percentile latency (ns)": 703815128309, "Max latency (ns)": 704344270415, "Mean latency (ns)": 351447909819, "Min duration satisfied": "Yes", "Min latency (ns)": 618647647, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 62069.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.samples_per_second": 62069.6, "characteristics.samples_per_second.normalized_per_core": 31034.8, "characteristics.samples_per_second.normalized_per_processor": 31034.8, "characteristics.total": 50000, "ck_system": "R7525_A100-PCIe-40GBx2_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.5 GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R7525_A100-PCIe-40GBx2_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 43718400, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100-PCIe-40GBx2_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 66240, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "80e45a8b25a6879b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 420763396502, "90.00 percentile latency (ns)": 757598716810, "95.00 percentile latency (ns)": 799584338085, "97.00 percentile latency (ns)": 816421455409, "99.00 percentile latency (ns)": 833361577862, "99.90 percentile latency (ns)": 840852588710, "Max latency (ns)": 841631909283, "Mean latency (ns)": 193265236773, "Min duration satisfied": "Yes", "Min latency (ns)": 1112752756, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 96298.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.samples_per_second": 96298.6, "characteristics.samples_per_second.normalized_per_core": 32099.533333333336, "characteristics.samples_per_second.normalized_per_processor": 32099.533333333336, "characteristics.total": 50000, "ck_system": "R740_A100-PCIe-40GBx3_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R740_A100-PCIe-40GBx3_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 81048000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_A100-PCIe-40GBx3_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 122800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "4708ebc97dc4c61c", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 335710751557, "90.00 percentile latency (ns)": 604383131222, "95.00 percentile latency (ns)": 638029313146, "97.00 percentile latency (ns)": 651439291218, "99.00 percentile latency (ns)": 664862041536, "99.90 percentile latency (ns)": 671000663893, "Max latency (ns)": 671683088212, "Mean latency (ns)": 335796909807, "Min duration satisfied": "Yes", "Min latency (ns)": 444030304, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 40435.2, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 8000", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.samples_per_second": 40435.2, "characteristics.samples_per_second.normalized_per_core": 13478.4, "characteristics.samples_per_second.normalized_per_processor": 13478.4, "characteristics.total": 50000, "ck_system": "R7525_QuadroRTX8000x3_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R7525_QuadroRTX8000x3_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 27159660, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_QuadroRTX8000x3_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 41151, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "044469b2b6f398e4", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32538559742, "90.00 percentile latency (ns)": 58949570317, "95.00 percentile latency (ns)": 62276987558, "97.00 percentile latency (ns)": 63598674060, "99.00 percentile latency (ns)": 64941552006, "99.90 percentile latency (ns)": 65535918182, "Max latency (ns)": 65594167025, "Mean latency (ns)": 32632774954, "Min duration satisfied": "Yes", "Min latency (ns)": 49056132, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 22438, "Scenario": "offline", "accelerator_frequency": "-", "accelerator_host_interconnect": "PCIe 3.0", "accelerator_interconnect": "PCIe 3.0 ", "accelerator_interconnect_topology": "-", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "Tesla T4", "accelerator_on-chip_memories": 1, "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.034, "characteristics.good": 38017, "characteristics.samples_per_second": 22438, "characteristics.samples_per_second.normalized_per_core": 5609.5, "characteristics.samples_per_second.normalized_per_processor": 5609.5, "characteristics.total": 50000, "ck_system": "R740_T4x4_tensorrt", "ck_used": true, "cooling": "-", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT NGC 19.09", "host_memory_capacity": "384 GB", "host_memory_configuration": "-", "host_networking": "-", "host_networking_topology": "-", "host_processor_caches": "-", "host_processor_core_count": 120, "host_processor_frequency": "3.00GHz", "host_processor_interconnect": "UPI", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "110 GB", "host_storage_type": "SSD", "hw_notes": "none", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/DellEMC/results/R740_T4x4_tensorrt", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "Docker 19.03.2; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 1471800, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "nonePowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_T4x4_tensorrt", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "target_latency (ns)": 0, "target_qps": 22300, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "ab4ce59526f6b357", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33168122488, "90.00 percentile latency (ns)": 60062547021, "95.00 percentile latency (ns)": 63444782367, "97.00 percentile latency (ns)": 64794752336, "99.00 percentile latency (ns)": 66147427677, "99.90 percentile latency (ns)": 66755954172, "Max latency (ns)": 66820665601, "Mean latency (ns)": 33258271761, "Min duration satisfied": "Yes", "Min latency (ns)": 78092424, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 71214.5, "Scenario": "offline", "accelerator_frequency": "-", "accelerator_host_interconnect": "PCIe 3.0", "accelerator_interconnect": "PCIe 3.0 ", "accelerator_interconnect_topology": "-", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "Tesla T4", "accelerator_on-chip_memories": 1, "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 70.814, "characteristics.good": 35407, "characteristics.samples_per_second": 71214.5, "characteristics.samples_per_second.normalized_per_core": 17803.625, "characteristics.samples_per_second.normalized_per_processor": 17803.625, "characteristics.total": 50000, "ck_system": "R740_T4x4_tensorrt", "ck_used": true, "cooling": "-", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT NGC 19.09", "host_memory_capacity": "384 GB", "host_memory_configuration": "-", "host_networking": "-", "host_networking_topology": "-", "host_processor_caches": "-", "host_processor_core_count": 120, "host_processor_frequency": "3.00GHz", "host_processor_interconnect": "UPI", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "110 GB", "host_storage_type": "SSD", "hw_notes": "none", "informal_model": "mobilenet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.5, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/DellEMC/results/R740_T4x4_tensorrt", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "Docker 19.03.2; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 4758600, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "mobilenet_sym_no_bn.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "nonePowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_T4x4_tensorrt", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "target_latency (ns)": 0, "target_qps": 72100, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "b4d6e81354e228e3", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32921454101, "90.00 percentile latency (ns)": 59246354310, "95.00 percentile latency (ns)": 62536914332, "97.00 percentile latency (ns)": 63853027915, "99.00 percentile latency (ns)": 65169358659, "99.90 percentile latency (ns)": 65761241084, "Max latency (ns)": 65826669598, "Mean latency (ns)": 32917170593, "Min duration satisfied": "Yes", "Min latency (ns)": 4316678, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "XLNX_AI", "Samples per second": 6015.8, "Scenario": "offline", "accelerator_memory_capacity": "16 GB per accelerator", "accelerator_memory_configuration": "DDR", "accelerator_model_name": "xilinx_vck5000_v4e", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.768, "characteristics.good": 37884, "characteristics.samples_per_second": 6015.8, "characteristics.samples_per_second.normalized_per_core": 6015.8, "characteristics.samples_per_second.normalized_per_processor": 6015.8, "characteristics.total": 50000, "ck_system": "R740xd_vck5000", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TF", "host_memory_capacity": "196 GB", "host_processor_core_count": 120, "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "20 GB", "host_storage_type": "HDD", "informal_model": "resnet50", "input_data_types": "", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/DellEMC/results/R740xd_vck5000", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "Xilinx Vitis Runtime; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 24576, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 396000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "", "status": "rdi", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740xd_vck5000", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 6000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "4f84b9da27833497", "use_accelerator": true, "weight_data_types": "", "weight_transformations": "" }, { "50.00 percentile latency (ns)": 33356965430, "90.00 percentile latency (ns)": 60437892477, "95.00 percentile latency (ns)": 63854990307, "97.00 percentile latency (ns)": 65206660599, "99.00 percentile latency (ns)": 66575129540, "99.90 percentile latency (ns)": 67188021897, "Max latency (ns)": 67243671938, "Mean latency (ns)": 33462546870, "Min duration satisfied": "Yes", "Min latency (ns)": 60376298, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 49075.2, "Scenario": "offline", "accelerator_frequency": "1590MHz", "accelerator_host_interconnect": "PCI Express 3.0", "accelerator_interconnect": "PCI Express 3.0", "accelerator_interconnect_topology": "4 Accelerators per CPU", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.samples_per_second": 49075.2, "characteristics.samples_per_second.normalized_per_core": 6134.4, "characteristics.samples_per_second.normalized_per_processor": 6134.4, "characteristics.total": 50000, "ck_system": "R7525_T4x8_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "1 TB", "host_memory_configuration": "8x64GB DDR4-3200 HMAA8GR7AJR4N-XN RDIMM ECC", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "2MB+16MB+128MB", "host_processor_core_count": 120, "host_processor_frequency": "2.35GHz", "host_processor_interconnect": "Infinity Fabric", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3 TB (5x800GB WUSTR6480ASS200 in RAID5)", "host_storage_type": "3D-TLC Solid State with 12Gbps SAS", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7525_T4x8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 3300000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_T4x8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 50000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "282148aa9305f54a", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 90149970883, "90.00 percentile latency (ns)": 90149970883, "95.00 percentile latency (ns)": 90149970883, "97.00 percentile latency (ns)": 90149970883, "99.00 percentile latency (ns)": 90149970883, "99.90 percentile latency (ns)": 90149970883, "Max latency (ns)": 90149970883, "Mean latency (ns)": 90149970883, "Min duration satisfied": "Yes", "Min latency (ns)": 90149970883, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "SUT\u0000", "Samples per second": 2562.4, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.292, "characteristics.good": 38146, "characteristics.samples_per_second": 2562.4, "characteristics.samples_per_second.normalized_per_core": 10.676666666666668, "characteristics.samples_per_second.normalized_per_processor": 1281.2, "characteristics.total": 50000, "ck_system": "R740xd8280M_openvino", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVINO", "host_memory_capacity": "376GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.70GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.59TB", "host_storage_type": "SATA", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 240, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R740xd8280M_openvino", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "MKL-DNN; URL: github.com/intel/mkl-dnn; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "no", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 231000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "/home/dvdt/drop/Models/resnet50/resnet50_int8.xml", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740xd8280M_openvino", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 3500, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "ee5e6a7fe40a8831", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "TF -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)" }, { "50.00 percentile latency (ns)": 32964310621, "90.00 percentile latency (ns)": 59378911338, "95.00 percentile latency (ns)": 62683231695, "97.00 percentile latency (ns)": 63996546964, "99.00 percentile latency (ns)": 65323049586, "99.90 percentile latency (ns)": 65917214253, "Max latency (ns)": 65973229191, "Mean latency (ns)": 32984218216, "Min duration satisfied": "Yes", "Min latency (ns)": 52992043, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 25141.2, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 25141.2, "characteristics.samples_per_second.normalized_per_core": 6285.3, "characteristics.samples_per_second.normalized_per_processor": 6285.3, "characteristics.total": 50000, "ck_system": "XE2420_T4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.10GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/XE2420_T4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1658646, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_T4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 25131, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "337a104d34a689dc", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33759317093, "90.00 percentile latency (ns)": 60793778878, "95.00 percentile latency (ns)": 64140924868, "97.00 percentile latency (ns)": 65531980765, "99.00 percentile latency (ns)": 66892729588, "99.90 percentile latency (ns)": 67459038759, "Max latency (ns)": 67513194648, "Mean latency (ns)": 33799448284, "Min duration satisfied": "Yes", "Min latency (ns)": 248955713, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 119210, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 8000", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.samples_per_second": 119210, "characteristics.samples_per_second.normalized_per_core": 14901.25, "characteristics.samples_per_second.normalized_per_processor": 14901.25, "characteristics.total": 50000, "ck_system": "DSS8440_QuadroRTX8000x8_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_QuadroRTX8000x8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 8048238, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_QuadroRTX8000x8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 121943, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "96e458f9253c93e9", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 274711858597, "90.00 percentile latency (ns)": 274711858597, "95.00 percentile latency (ns)": 274711858597, "97.00 percentile latency (ns)": 274711858597, "99.00 percentile latency (ns)": 274711858597, "99.90 percentile latency (ns)": 274711858597, "Max latency (ns)": 274711858597, "Mean latency (ns)": 274711858597, "Min duration satisfied": "Yes", "Min latency (ns)": 274711858597, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "SUT\u0000", "Samples per second": 2402.52, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.292, "characteristics.good": 38146, "characteristics.samples_per_second": 2402.52, "characteristics.samples_per_second.normalized_per_core": 10.0105, "characteristics.samples_per_second.normalized_per_processor": 1201.26, "characteristics.total": 50000, "ck_system": "R640_6248R_VMwarevSphere7.0_openvino", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVINO", "host_memory_capacity": "192GB", "host_memory_configuration": "12 X 16 GB 2933 MT/s", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "21TB", "host_storage_type": "VSAN", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 240, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R640_6248R_VMwarevSphere7.0_openvino", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "VMware vSphere 7.0.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "no", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 660000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "/home/dvdt/drop/Models/resnet50/resnet50_int8.xml", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Tests are run inside 1 virtual machine with 96 vCPU, 192GB memory, 100GB storagePowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R640_6248R_VMwarevSphere7.0_openvino", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 10000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "c05487a930659bc9", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "TF -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)" }, { "50.00 percentile latency (ns)": 33049936350, "90.00 percentile latency (ns)": 59489112966, "95.00 percentile latency (ns)": 62802173025, "97.00 percentile latency (ns)": 64135755338, "99.00 percentile latency (ns)": 65441980453, "99.90 percentile latency (ns)": 66012792283, "Max latency (ns)": 66093161420, "Mean latency (ns)": 33093965056, "Min duration satisfied": "Yes", "Min latency (ns)": 220758163, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 150388, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 6000", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.samples_per_second": 150388, "characteristics.samples_per_second.normalized_per_core": 15038.8, "characteristics.samples_per_second.normalized_per_processor": 15038.8, "characteristics.total": 50000, "ck_system": "DSS8440_QuadroRTX6000x10_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_QuadroRTX6000x10_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 9939600, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_QuadroRTX6000x10_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 150600, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "cc9c24577babb769", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 34771357922, "90.00 percentile latency (ns)": 62693506386, "95.00 percentile latency (ns)": 66109360860, "97.00 percentile latency (ns)": 67490581955, "99.00 percentile latency (ns)": 69001987303, "99.90 percentile latency (ns)": 69521480459, "Max latency (ns)": 69521678932, "Mean latency (ns)": 34791227895, "Min duration satisfied": "Yes", "Min latency (ns)": 287868075, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 62884.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIE-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.samples_per_second": 62884.6, "characteristics.samples_per_second.normalized_per_core": 31442.3, "characteristics.samples_per_second.normalized_per_processor": 31442.3, "characteristics.total": 50000, "ck_system": "R7525_A100x2_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.50GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.84 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7525_A100x2_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 4371840, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100x2_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 66240, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "d632e5ca0954450a", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 37233666071, "90.00 percentile latency (ns)": 67765826421, "95.00 percentile latency (ns)": 71628116364, "97.00 percentile latency (ns)": 73190807448, "99.00 percentile latency (ns)": 74741990437, "99.90 percentile latency (ns)": 75431244531, "Max latency (ns)": 75509582235, "Mean latency (ns)": 37412618214, "Min duration satisfied": "Yes", "Min latency (ns)": 56007628, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 22725.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA GRID T4-16Q", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.samples_per_second": 22725.6, "characteristics.samples_per_second.normalized_per_core": 5681.4, "characteristics.samples_per_second.normalized_per_processor": 5681.4, "characteristics.total": 50000, "ck_system": "R740_vT4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "382 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1 TB", "host_storage_type": "Hard Disk vSAN", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R740_vT4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1716000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "VMWarePowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_vT4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 26000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "67884179bfcd91f7", "use_accelerator": true, "vmt_memory_capacity": "300 GB", "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 31271403063, "90.00 percentile latency (ns)": 56336082721, "95.00 percentile latency (ns)": 59469267301, "97.00 percentile latency (ns)": 60740373348, "99.00 percentile latency (ns)": 62024491848, "99.90 percentile latency (ns)": 62541673455, "Max latency (ns)": 62542943339, "Mean latency (ns)": 31321563726, "Min duration satisfied": "Yes", "Min latency (ns)": 274925016, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 95228, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIE-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 95228, "characteristics.samples_per_second.normalized_per_core": 31742.666666666668, "characteristics.samples_per_second.normalized_per_processor": 31742.666666666668, "characteristics.total": 50000, "ck_system": "R7525_A100x3_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.40GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.5 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7525_A100x3_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 5955840, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100x3_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 90240, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "d3824884a532b466", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33363332267, "90.00 percentile latency (ns)": 59999651357, "95.00 percentile latency (ns)": 63320695972, "97.00 percentile latency (ns)": 64662046838, "99.00 percentile latency (ns)": 65985843228, "99.90 percentile latency (ns)": 66556493772, "Max latency (ns)": 66626884232, "Mean latency (ns)": 33380843509, "Min duration satisfied": "Yes", "Min latency (ns)": 219011829, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 149124, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 8000", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.samples_per_second": 149124, "characteristics.samples_per_second.normalized_per_core": 14912.4, "characteristics.samples_per_second.normalized_per_processor": 14912.4, "characteristics.total": 50000, "ck_system": "DSS8440_QuadroRTX8000x10_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.10GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.84 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_QuadroRTX8000x10_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 9935640, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_QuadroRTX8000x10_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 150540, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "a81cf3af33487aad", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33258354691, "90.00 percentile latency (ns)": 60065184899, "95.00 percentile latency (ns)": 63423365764, "97.00 percentile latency (ns)": 64774392622, "99.00 percentile latency (ns)": 66118358809, "99.90 percentile latency (ns)": 66719697251, "Max latency (ns)": 66790010848, "Mean latency (ns)": 33317168035, "Min duration satisfied": "Yes", "Min latency (ns)": 104640447, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 96610.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 16, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 96610.6, "characteristics.samples_per_second.normalized_per_core": 6038.1625, "characteristics.samples_per_second.normalized_per_processor": 6038.1625, "characteristics.total": 50000, "ck_system": "DSS8440_T4x16_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.84 TB", "host_storage_type": "NVMe", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 16, "normalize_processors": 16, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_T4x16_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 6452622, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_T4x16_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 97767, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "cacea4c7cac1e2fc", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 69551908830, "90.00 percentile latency (ns)": 127220193536, "95.00 percentile latency (ns)": 134467879275, "97.00 percentile latency (ns)": 137348014017, "99.00 percentile latency (ns)": 140256640487, "99.90 percentile latency (ns)": 141552371881, "Max latency (ns)": 141691202054, "Mean latency (ns)": 70027598811, "Min duration satisfied": "Yes", "Min latency (ns)": 59744690, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 23290.1, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.samples_per_second": 23290.1, "characteristics.samples_per_second.normalized_per_core": 5822.525, "characteristics.samples_per_second.normalized_per_processor": 5822.525, "characteristics.total": 50000, "ck_system": "R7515_T4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.0.14, CUDA 11.0.207", "host_memory_capacity": "256 GB", "host_memory_configuration": "DDR-4", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.0GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 1, "host_storage_capacity": "3.2 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7515_T4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.0.14, CUDA 11.0.27, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 3300000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7515_T4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 50000, "task": "image classification", "task2": "image classification", "total_cores": 120, "uid": "28fcf21f3ba7a6cf", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 66858066695, "90.00 percentile latency (ns)": 66858066695, "95.00 percentile latency (ns)": 66858066695, "97.00 percentile latency (ns)": 66858066695, "99.00 percentile latency (ns)": 66858066695, "99.90 percentile latency (ns)": 66858066695, "Max latency (ns)": 66858066695, "Mean latency (ns)": 66858066695, "Min duration satisfied": "Yes", "Min latency (ns)": 66858066695, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "SUT\u0000", "Samples per second": 2467.91, "Scenario": "offline", "accelerator_frequency": "-", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "-", "accelerator_memory_configuration": "-", "accelerator_model_name": "-", "accelerator_on-chip_memories": "-", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.292, "characteristics.good": 38146, "characteristics.samples_per_second": 2467.91, "characteristics.samples_per_second.normalized_per_core": 10.282958333333333, "characteristics.samples_per_second.normalized_per_processor": 1233.955, "characteristics.total": 50000, "ck_system": "R640_6248R-openvino-2020", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVINO 2020", "host_memory_capacity": "188GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "3.00GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "200GB", "host_storage_type": "SATA", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 240, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R640_6248R-openvino-2020", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "no", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 165000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "/home/dividiti/mlperf_ext_ov_cpp_v0.7-master/Models/resnet50/resnet50_int8.xml", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R640_6248R-openvino-2020", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 2500, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "b472e8c74d3f9aea", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "TF -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)" }, { "50.00 percentile latency (ns)": 32924513772, "90.00 percentile latency (ns)": 59452551706, "95.00 percentile latency (ns)": 62797794725, "97.00 percentile latency (ns)": 64135457908, "99.00 percentile latency (ns)": 65473299907, "99.90 percentile latency (ns)": 66076419993, "Max latency (ns)": 66137214295, "Mean latency (ns)": 32998316772, "Min duration satisfied": "Yes", "Min latency (ns)": 125333281, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 73901.4, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 12, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 73901.4, "characteristics.samples_per_second.normalized_per_core": 6158.45, "characteristics.samples_per_second.normalized_per_processor": 6158.45, "characteristics.total": 50000, "ck_system": "DSS8440_T4x12_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 12, "normalize_processors": 12, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_T4x12_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 4887630, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_T4x12_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 74055, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "24b004c88a11c726", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 36404805408, "90.00 percentile latency (ns)": 65472702159, "95.00 percentile latency (ns)": 69178481266, "97.00 percentile latency (ns)": 70622484004, "99.00 percentile latency (ns)": 72066945952, "99.90 percentile latency (ns)": 72692344968, "Max latency (ns)": 72737207737, "Mean latency (ns)": 36432626629, "Min duration satisfied": "Yes", "Min latency (ns)": 369897683, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 59130.1, "Scenario": "offline", "accelerator_frequency": "1770MHz", "accelerator_host_interconnect": "PCI Express 3.0", "accelerator_interconnect": "PCI Express 3.0", "accelerator_interconnect_topology": "2 Accelerators per CPU", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 6000", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.samples_per_second": 59130.1, "characteristics.samples_per_second.normalized_per_core": 14782.525, "characteristics.samples_per_second.normalized_per_processor": 14782.525, "characteristics.total": 50000, "ck_system": "C4140_QuadroRTX6000x4_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "384 GB", "host_memory_configuration": "6x16GB DDR4-2666 HMA82GR7AFR8N-VK RDIMM ECC", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "1.25MB+20MB+27.5MB", "host_processor_core_count": 120, "host_processor_frequency": "2.40GHz", "host_processor_interconnect": "Ultra Path Interconnect", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.6 TB (1x1.6TB Dell Express Flash PM1725a 1.6TB AIC)", "host_storage_type": "3D-TLC Solid State with PCIe NVME x8 Interface", "hw_notes": "ECC off. RTX6000 is available as a special config thru Dell DSS or OEM for PowerEdge C4140", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/C4140_QuadroRTX6000x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 4300956, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/C4140_QuadroRTX6000x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 65166, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "0c2af704420b9d5f", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 30377040753, "90.00 percentile latency (ns)": 54587239609, "95.00 percentile latency (ns)": 57640012358, "97.00 percentile latency (ns)": 58869981154, "99.00 percentile latency (ns)": 60093016598, "99.90 percentile latency (ns)": 60675436392, "Max latency (ns)": 60691606388, "Mean latency (ns)": 30354149838, "Min duration satisfied": "Yes", "Min latency (ns)": 222368223, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 44750.3, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 8000", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.samples_per_second": 44750.3, "characteristics.samples_per_second.normalized_per_core": 14916.766666666668, "characteristics.samples_per_second.normalized_per_processor": 14916.766666666668, "characteristics.total": 50000, "ck_system": "R7525_QuadroRTX8000x3_TRT", "ck_used": false, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 32, "host_processor_frequency": "2.50GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7502", "host_processors_per_node": 2, "host_storage_capacity": "1.84 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7525_QuadroRTX8000x3_TRT", "number_of_nodes": 1, "operating_system": "CentOS Linux release 8.1.1911", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 2715966, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_QuadroRTX8000x3_TRT", "system_name": "Dell EMC PowerEdge R7525 (3x Quadro RTX 8000)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 41151, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "3490f847244612e9", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 68374519380, "90.00 percentile latency (ns)": 124314639006, "95.00 percentile latency (ns)": 131363743451, "97.00 percentile latency (ns)": 134172965451, "99.00 percentile latency (ns)": 137000712431, "99.90 percentile latency (ns)": 138267333707, "Max latency (ns)": 138398064752, "Mean latency (ns)": 68654000455, "Min duration satisfied": "Yes", "Min latency (ns)": 113297148, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 23844.3, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.samples_per_second": 23844.3, "characteristics.samples_per_second.normalized_per_core": 5961.075, "characteristics.samples_per_second.normalized_per_processor": 5961.075, "characteristics.total": 50000, "ck_system": "R740_T4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.0.14, CUDA 11.0.207", "host_memory_capacity": "384 GB", "host_memory_configuration": "DDR-4", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "3.0GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R740_T4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.0.14, CUDA 11.0.207, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 3300000, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_T4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 0, "target_qps": 50000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "dd8a0b7dacc60f19", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" } ]