[ { "50.00 percentile latency (ns)": 3339882, "90.00 percentile latency (ns)": 3717742, "95.00 percentile latency (ns)": 3843592, "97.00 percentile latency (ns)": 3992515, "99.00 percentile latency (ns)": 4293186, "99.90 percentile latency (ns)": 4673668, "Completed samples per second": 77976.25, "Max latency (ns)": 12256687, "Mean latency (ns)": 3358800, "Min duration satisfied": "Yes", "Min latency (ns)": 2734218, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 77976.67, "accelerator_cooling_type": "passive", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "1.4.8", "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.power": 1097.377166666667, "characteristics.power.normalized_per_core": 365.79238888888904, "characteristics.power.normalized_per_processor": 365.79238888888904, "characteristics.scheduled_queries_per_second": 77976.67, "characteristics.scheduled_queries_per_second.normalized_per_core": 25992.22333333333, "characteristics.scheduled_queries_per_second.normalized_per_processor": 25992.22333333333, "characteristics.total": 50000, "ck_system": "R7525_A100_PCIe-40GBx3_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "filesystem": "ext4", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "passive", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.5 GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "4.20.20.20", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R7525_A100_PCIe-40GBx3_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_settings": "closed/DellEMC/measurements/R7525_A100_PCIe-40GBx3_TRT/R7525_A100-PCIe-40GBx3_power_settings.adoc", "print_timestamps": 0, "problem": false, "psu_details": "2x1100W", "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100_PCIe-40GBx3_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 78000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "1b971a092af5fd3f", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 7123746, "90.00 percentile latency (ns)": 10467098, "95.00 percentile latency (ns)": 11436907, "97.00 percentile latency (ns)": 12144344, "99.00 percentile latency (ns)": 13789209, "99.90 percentile latency (ns)": 16598081, "Completed samples per second": 21691.05, "Max latency (ns)": 20180773, "Mean latency (ns)": 7799654, "Min duration satisfied": "Yes", "Min latency (ns)": 4135111, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 21691.28, "accelerator_cooling_type": "passive", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "1.1.3", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.power": 792.6940000000002, "characteristics.power.normalized_per_core": 198.17350000000005, "characteristics.power.normalized_per_processor": 198.17350000000005, "characteristics.scheduled_queries_per_second": 21691.28, "characteristics.scheduled_queries_per_second.normalized_per_core": 5422.82, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5422.82, "characteristics.total": 50000, "ck_system": "XE2420_datacenter_T4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "filesystem": "ext3/ext4", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "passive", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "4.40.10.00", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/XE2420_datacenter_T4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_settings": "closed/DellEMC/measurements/XE2420_datacenter_T4x4_TRT/XE2420_T4x4_power_settings.adoc", "print_timestamps": 0, "problem": false, "psu_details": "2x2000W", "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_datacenter_T4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 21700, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "b1225418d8279935", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 6401544, "90.00 percentile latency (ns)": 9409049, "95.00 percentile latency (ns)": 10725572, "97.00 percentile latency (ns)": 11745188, "99.00 percentile latency (ns)": 14104902, "99.90 percentile latency (ns)": 21663555, "Completed samples per second": 1969.92, "Max latency (ns)": 53318235, "Mean latency (ns)": 7137914, "Min duration satisfied": "Yes", "Min latency (ns)": 5203541, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "SUT", "Scenario": "server", "Scheduled samples per second": 1969.95, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.292, "characteristics.good": 38146, "characteristics.scheduled_queries_per_second": 1969.95, "characteristics.scheduled_queries_per_second.normalized_per_core": 35.17767857142857, "characteristics.scheduled_queries_per_second.normalized_per_processor": 984.975, "characteristics.total": 50000, "ck_system": "R750_Intel6330_openvino", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVINO", "host_memory_capacity": "1TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "2.0GHz", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6330 CPU @ 2.0GHz", "host_processors_per_node": 2, "host_storage_capacity": "1.5TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 56, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R750_Intel6330_openvino", "number_of_nodes": 1, "operating_system": "Ubuntu 20.10 (GNU/Linux 5.8.0-45-generic x86_64)", "other_software_stack": "MKL-DNN; URL: github.com/intel/mkl-dnn", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 1024, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "no", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_int8.xml", "status": "preview", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750_Intel6330_openvino", "system_name": "Dell EMC PowerEdge R750", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 1970, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "68988b4b45a380a5", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "TF -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)" }, { "50.00 percentile latency (ns)": 8592974, "90.00 percentile latency (ns)": 9991062, "95.00 percentile latency (ns)": 10166934, "97.00 percentile latency (ns)": 10277605, "99.00 percentile latency (ns)": 10914124, "99.90 percentile latency (ns)": 13039466, "Completed samples per second": 263994.8, "Max latency (ns)": 17698236, "Mean latency (ns)": 8035230, "Min duration satisfied": "Yes", "Min latency (ns)": 3504051, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 263998.22, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.scheduled_queries_per_second": 263998.22, "characteristics.scheduled_queries_per_second.normalized_per_core": 26399.821999999996, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26399.821999999996, "characteristics.total": 50000, "ck_system": "DSS8440_A100-PCIE-40GBx10_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/DSS8440_A100-PCIE-40GBx10_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A100-PCIE-40GBx10_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 264000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "b4e80a0e2f0da5b3", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 10009317, "90.00 percentile latency (ns)": 13168615, "95.00 percentile latency (ns)": 13405042, "97.00 percentile latency (ns)": 13550545, "99.00 percentile latency (ns)": 13883143, "99.90 percentile latency (ns)": 17383552, "Completed samples per second": 137728.36, "Max latency (ns)": 22957340, "Mean latency (ns)": 10719853, "Min duration satisfied": "Yes", "Min latency (ns)": 6112464, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 137730.65, "accelerator_cooling_type": "", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 137730.65, "characteristics.scheduled_queries_per_second.normalized_per_core": 34432.6625, "characteristics.scheduled_queries_per_second.normalized_per_processor": 34432.6625, "characteristics.total": 50000, "ck_system": "XE8545_7763_A100-SXM4-80GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe SSD", "hw_notes": "500W A100-SXM-80GB", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/XE8545_7763_A100-SXM4-80GBx4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_7763_A100-SXM4-80GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 137750, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "511bf08ae8e665ee", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 10423242, "90.00 percentile latency (ns)": 13904285, "95.00 percentile latency (ns)": 14082860, "97.00 percentile latency (ns)": 14187900, "99.00 percentile latency (ns)": 14388858, "99.90 percentile latency (ns)": 14799085, "Completed samples per second": 133480.55, "Max latency (ns)": 19108942, "Mean latency (ns)": 11378386, "Min duration satisfied": "Yes", "Min latency (ns)": 5985611, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 133482.36, "accelerator_cooling_type": "", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.scheduled_queries_per_second": 133482.36, "characteristics.scheduled_queries_per_second.normalized_per_core": 33370.59, "characteristics.scheduled_queries_per_second.normalized_per_processor": 33370.59, "characteristics.total": 50000, "ck_system": "XE8545_7713_A100-SXM4-40GBx4", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/XE8545_7713_A100-SXM4-40GBx4", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_7713_A100-SXM4-40GBx4", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 133500, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "3441a31d7545fd97", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 10448582, "90.00 percentile latency (ns)": 13939351, "95.00 percentile latency (ns)": 14115546, "97.00 percentile latency (ns)": 14218863, "99.00 percentile latency (ns)": 14418234, "99.90 percentile latency (ns)": 14827054, "Completed samples per second": 133480.12, "Max latency (ns)": 17946972, "Mean latency (ns)": 11398077, "Min duration satisfied": "Yes", "Min latency (ns)": 6240243, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 133482.36, "accelerator_cooling_type": "", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.scheduled_queries_per_second": 133482.36, "characteristics.scheduled_queries_per_second.normalized_per_core": 33370.59, "characteristics.scheduled_queries_per_second.normalized_per_processor": 33370.59, "characteristics.total": 50000, "ck_system": "XE8545_7763_A100-SXM4-40GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_cooling_type": "", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/XE8545_7763_A100-SXM4-40GBx4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_7763_A100-SXM4-40GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 133500, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "a7829a40aeac3a97", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 10236179, "90.00 percentile latency (ns)": 11286415, "95.00 percentile latency (ns)": 11567454, "97.00 percentile latency (ns)": 11746499, "99.00 percentile latency (ns)": 12067100, "99.90 percentile latency (ns)": 12627766, "Completed samples per second": 112986.6, "Max latency (ns)": 29336688, "Mean latency (ns)": 10235382, "Min duration satisfied": "Yes", "Min latency (ns)": 5028957, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 112987.66, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.scheduled_queries_per_second": 112987.66, "characteristics.scheduled_queries_per_second.normalized_per_core": 28246.915, "characteristics.scheduled_queries_per_second.normalized_per_processor": 28246.915, "characteristics.total": 50000, "ck_system": "R750xa_A100-PCIE-40GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "256 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R750xa_A100-PCIE-40GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-40GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 113000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "d4579b3d710874bd", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 13750584, "90.00 percentile latency (ns)": 14546407, "95.00 percentile latency (ns)": 14717814, "97.00 percentile latency (ns)": 14809842, "99.00 percentile latency (ns)": 14938621, "99.90 percentile latency (ns)": 15108134, "Completed samples per second": 176979.71, "Max latency (ns)": 25765638, "Mean latency (ns)": 13671655, "Min duration satisfied": "Yes", "Min latency (ns)": 11345827, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 176983.35, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A40", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.scheduled_queries_per_second": 176983.35, "characteristics.scheduled_queries_per_second.normalized_per_core": 17698.335, "characteristics.scheduled_queries_per_second.normalized_per_processor": 17698.335, "characteristics.total": 50000, "ck_system": "DSS8440_A40x10_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/DSS8440_A40x10_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A40x10_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 177000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "4665e601f94aa881", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3620706, "90.00 percentile latency (ns)": 4201807, "95.00 percentile latency (ns)": 4425481, "97.00 percentile latency (ns)": 4606071, "99.00 percentile latency (ns)": 4890863, "99.90 percentile latency (ns)": 5336766, "Completed samples per second": 51985.64, "Max latency (ns)": 12913131, "Mean latency (ns)": 3641250, "Min duration satisfied": "Yes", "Min latency (ns)": 2708722, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 51985.94, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.scheduled_queries_per_second": 51985.94, "characteristics.scheduled_queries_per_second.normalized_per_core": 25992.97, "characteristics.scheduled_queries_per_second.normalized_per_processor": 25992.97, "characteristics.total": 50000, "ck_system": "R7525_A100-PCIe-40GBx2_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.5 GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R7525_A100-PCIe-40GBx2_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100-PCIe-40GBx2_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 52000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "d577143a4f1da8cc", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3968861, "90.00 percentile latency (ns)": 4339983, "95.00 percentile latency (ns)": 4435887, "97.00 percentile latency (ns)": 4517359, "99.00 percentile latency (ns)": 4797873, "99.90 percentile latency (ns)": 19419770, "Completed samples per second": 77976.07, "Max latency (ns)": 69329043, "Mean latency (ns)": 4025291, "Min duration satisfied": "Yes", "Min latency (ns)": 1916009, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 77976.67, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.scheduled_queries_per_second": 77976.67, "characteristics.scheduled_queries_per_second.normalized_per_core": 25992.22333333333, "characteristics.scheduled_queries_per_second.normalized_per_processor": 25992.22333333333, "characteristics.total": 50000, "ck_system": "R740_A100-PCIe-40GBx3_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R740_A100-PCIe-40GBx3_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_A100-PCIe-40GBx3_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 78000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "6ba686f530c64815", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 6975257, "90.00 percentile latency (ns)": 7740388, "95.00 percentile latency (ns)": 7893468, "97.00 percentile latency (ns)": 7993609, "99.00 percentile latency (ns)": 8271718, "99.90 percentile latency (ns)": 9564331, "Completed samples per second": 34991.88, "Max latency (ns)": 16656333, "Mean latency (ns)": 6987013, "Min duration satisfied": "Yes", "Min latency (ns)": 2846436, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 34992.18, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 8000", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.028, "characteristics.good": 38014, "characteristics.scheduled_queries_per_second": 34992.18, "characteristics.scheduled_queries_per_second.normalized_per_core": 11664.06, "characteristics.scheduled_queries_per_second.normalized_per_processor": 11664.06, "characteristics.total": 50000, "ck_system": "R7525_QuadroRTX8000x3_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/DellEMC/results/R7525_QuadroRTX8000x3_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_QuadroRTX8000x3_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 35000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "26f4d9cabc5c323e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 6700087, "90.00 percentile latency (ns)": 9225732, "95.00 percentile latency (ns)": 10498155, "97.00 percentile latency (ns)": 11419557, "99.00 percentile latency (ns)": 13556798, "99.90 percentile latency (ns)": 18948817, "Completed samples per second": 20740.82, "Max latency (ns)": 26201589, "Mean latency (ns)": 7217901, "Min duration satisfied": "Yes", "Min latency (ns)": 4408490, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 20742.83, "accelerator_frequency": "-", "accelerator_host_interconnect": "PCIe 3.0", "accelerator_interconnect": "PCIe 3.0 ", "accelerator_interconnect_topology": "-", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "Tesla T4", "accelerator_on-chip_memories": 1, "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 76.034, "characteristics.good": 38017, "characteristics.scheduled_queries_per_second": 20742.83, "characteristics.scheduled_queries_per_second.normalized_per_core": 5185.7075, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5185.7075, "characteristics.total": 50000, "ck_system": "R740_T4x4_tensorrt", "ck_used": true, "cooling": "-", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT NGC 19.09", "host_memory_capacity": "384 GB", "host_memory_configuration": "-", "host_networking": "-", "host_networking_topology": "-", "host_processor_caches": "-", "host_processor_core_count": 120, "host_processor_frequency": "3.00GHz", "host_processor_interconnect": "UPI", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "110 GB", "host_storage_type": "SSD", "hw_notes": "none", "informal_model": "resnet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.5, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/DellEMC/results/R740_T4x4_tensorrt", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "Docker 19.03.2; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 1, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "nonePowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_T4x4_tensorrt", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "target_latency (ns)": 15000000, "target_qps": 20710, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "53f8f28456e9e884", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 2920125, "90.00 percentile latency (ns)": 3367123, "95.00 percentile latency (ns)": 4053907, "97.00 percentile latency (ns)": 5031175, "99.00 percentile latency (ns)": 8191984, "99.90 percentile latency (ns)": 11700201, "Completed samples per second": 67119.63, "Max latency (ns)": 15642988, "Mean latency (ns)": 3102634, "Min duration satisfied": "Yes", "Min latency (ns)": 2069578, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 67124.18, "accelerator_frequency": "-", "accelerator_host_interconnect": "PCIe 3.0", "accelerator_interconnect": "PCIe 3.0 ", "accelerator_interconnect_topology": "-", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "Tesla T4", "accelerator_on-chip_memories": 1, "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "characteristics.accuracy": 70.61, "characteristics.good": 35305, "characteristics.scheduled_queries_per_second": 67124.18, "characteristics.scheduled_queries_per_second.normalized_per_core": 16781.045, "characteristics.scheduled_queries_per_second.normalized_per_processor": 16781.045, "characteristics.total": 50000, "ck_system": "R740_T4x4_tensorrt", "ck_used": true, "cooling": "-", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT NGC 19.09", "host_memory_capacity": "384 GB", "host_memory_configuration": "-", "host_networking": "-", "host_networking_topology": "-", "host_processor_caches": "-", "host_processor_core_count": 120, "host_processor_frequency": "3.00GHz", "host_processor_interconnect": "UPI", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "110 GB", "host_storage_type": "SSD", "hw_notes": "none", "informal_model": "mobilenet", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.5, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.5/tree/master/closed/DellEMC/results/R740_T4x4_tensorrt", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "Docker 19.03.2; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 3133965575612453542, "retraining": "N", "sample_index_rng_seed": 665484352860916858, "samples_per_query": 1, "schedule_rng_seed": 3622009729038561421, "starting_weights_filename": "mobilenet_sym_no_bn.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "nonePowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_T4x4_tensorrt", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "target_latency (ns)": 10000000, "target_qps": 67100, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "094300b3bf9cf550", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5114914, "90.00 percentile latency (ns)": 9289844, "95.00 percentile latency (ns)": 11274720, "97.00 percentile latency (ns)": 12418852, "99.00 percentile latency (ns)": 14720943, "99.90 percentile latency (ns)": 17429410, "Completed samples per second": 5801.31, "Max latency (ns)": 19602140, "Mean latency (ns)": 5930705, "Min duration satisfied": "Yes", "Min latency (ns)": 3003162, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "XLNX_AI", "Scenario": "server", "Scheduled samples per second": 5802.01, "accelerator_memory_capacity": "16 GB per accelerator", "accelerator_memory_configuration": "DDR", "accelerator_model_name": "xilinx_vck5000_v4e", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.768, "characteristics.good": 37884, "characteristics.scheduled_queries_per_second": 5802.01, "characteristics.scheduled_queries_per_second.normalized_per_core": 5802.01, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5802.01, "characteristics.total": 50000, "ck_system": "R740xd_vck5000", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "open", "formal_model": "resnet50", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TF", "host_memory_capacity": "196 GB", "host_processor_core_count": 120, "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "20 GB", "host_storage_type": "HDD", "informal_model": "resnet50", "input_data_types": "", "key.accuracy": "characteristics.accuracy", "max_async_queries": 200, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/open/DellEMC/results/R740xd_vck5000", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "Xilinx Vitis Runtime; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "", "status": "rdi", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740xd_vck5000", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 5800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "e6a41b2e416986c4", "use_accelerator": true, "weight_data_types": "", "weight_transformations": "" }, { "50.00 percentile latency (ns)": 5328532, "90.00 percentile latency (ns)": 6967841, "95.00 percentile latency (ns)": 7501096, "97.00 percentile latency (ns)": 7856225, "99.00 percentile latency (ns)": 8645202, "99.90 percentile latency (ns)": 10155034, "Completed samples per second": 44803.89, "Max latency (ns)": 15673138, "Mean latency (ns)": 5631478, "Min duration satisfied": "Yes", "Min latency (ns)": 2100274, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 44807.19, "accelerator_frequency": "1590MHz", "accelerator_host_interconnect": "PCI Express 3.0", "accelerator_interconnect": "PCI Express 3.0", "accelerator_interconnect_topology": "4 Accelerators per CPU", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Tesla T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.scheduled_queries_per_second": 44807.19, "characteristics.scheduled_queries_per_second.normalized_per_core": 5600.89875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5600.89875, "characteristics.total": 50000, "ck_system": "R7525_T4x8_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "1 TB", "host_memory_configuration": "8x64GB DDR4-3200 HMAA8GR7AJR4N-XN RDIMM ECC", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "2MB+16MB+128MB", "host_processor_core_count": 120, "host_processor_frequency": "2.35GHz", "host_processor_interconnect": "Infinity Fabric", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3 TB (5x800GB WUSTR6480ASS200 in RAID5)", "host_storage_type": "3D-TLC Solid State with 12Gbps SAS", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7525_T4x8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_T4x8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 44800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "ba04c1d956d3f961", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 7741530, "90.00 percentile latency (ns)": 10487473, "95.00 percentile latency (ns)": 11678369, "97.00 percentile latency (ns)": 12594974, "99.00 percentile latency (ns)": 14556097, "99.90 percentile latency (ns)": 18345713, "Completed samples per second": 1523.56, "Max latency (ns)": 45867423, "Mean latency (ns)": 8361415, "Min duration satisfied": "Yes", "Min latency (ns)": 6449396, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "SUT\u0000", "Scenario": "server", "Scheduled samples per second": 1523.63, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.292, "characteristics.good": 38146, "characteristics.scheduled_queries_per_second": 1523.63, "characteristics.scheduled_queries_per_second.normalized_per_core": 6.348458333333334, "characteristics.scheduled_queries_per_second.normalized_per_processor": 761.815, "characteristics.total": 50000, "ck_system": "R740xd8280M_openvino", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVINO", "host_memory_capacity": "376GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.70GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.59TB", "host_storage_type": "SATA", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 240, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R740xd8280M_openvino", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "MKL-DNN; URL: github.com/intel/mkl-dnn; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "no", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "/home/dvdt/drop/Models/resnet50/resnet50_int8.xml", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740xd8280M_openvino", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 1525, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "c9da3aea23e117c9", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "TF -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)" }, { "50.00 percentile latency (ns)": 5842082, "90.00 percentile latency (ns)": 6437596, "95.00 percentile latency (ns)": 6596881, "97.00 percentile latency (ns)": 6705957, "99.00 percentile latency (ns)": 6977506, "99.90 percentile latency (ns)": 8573432, "Completed samples per second": 21000.8, "Max latency (ns)": 13976834, "Mean latency (ns)": 5873948, "Min duration satisfied": "Yes", "Min latency (ns)": 4476840, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 21002.93, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 21002.93, "characteristics.scheduled_queries_per_second.normalized_per_core": 5250.7325, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5250.7325, "characteristics.total": 50000, "ck_system": "XE2420_T4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.10GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/XE2420_T4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_T4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 21000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "76599250f1751c02", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 8661175, "90.00 percentile latency (ns)": 9148440, "95.00 percentile latency (ns)": 9331282, "97.00 percentile latency (ns)": 9508485, "99.00 percentile latency (ns)": 10443705, "99.90 percentile latency (ns)": 18638230, "Completed samples per second": 98988.87, "Max latency (ns)": 21706830, "Mean latency (ns)": 8727174, "Min duration satisfied": "Yes", "Min latency (ns)": 5493609, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 99003.75, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 8000", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.scheduled_queries_per_second": 99003.75, "characteristics.scheduled_queries_per_second.normalized_per_core": 12375.46875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 12375.46875, "characteristics.total": 50000, "ck_system": "DSS8440_QuadroRTX8000x8_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_QuadroRTX8000x8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_QuadroRTX8000x8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 99000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "40eb17f6cab245a3", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 10359959, "90.00 percentile latency (ns)": 11641576, "95.00 percentile latency (ns)": 12593958, "97.00 percentile latency (ns)": 13306661, "99.00 percentile latency (ns)": 14950915, "99.90 percentile latency (ns)": 28221465, "Completed samples per second": 1798.27, "Max latency (ns)": 372464456, "Mean latency (ns)": 10658408, "Min duration satisfied": "Yes", "Min latency (ns)": 7175758, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "SUT\u0000", "Scenario": "server", "Scheduled samples per second": 1798.38, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "N/A", "accelerator_memory_configuration": "", "accelerator_model_name": "N/A", "accelerator_on-chip_memories": "", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.292, "characteristics.good": 38146, "characteristics.scheduled_queries_per_second": 1798.38, "characteristics.scheduled_queries_per_second.normalized_per_core": 7.493250000000001, "characteristics.scheduled_queries_per_second.normalized_per_processor": 899.19, "characteristics.total": 50000, "ck_system": "R640_6248R_VMwarevSphere7.0_openvino", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVINO", "host_memory_capacity": "192GB", "host_memory_configuration": "12 X 16 GB 2933 MT/s", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "21TB", "host_storage_type": "VSAN", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 240, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R640_6248R_VMwarevSphere7.0_openvino", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "VMware vSphere 7.0.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "no", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "/home/dvdt/drop/Models/resnet50/resnet50_int8.xml", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Tests are run inside 1 virtual machine with 96 vCPU, 192GB memory, 100GB storagePowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R640_6248R_VMwarevSphere7.0_openvino", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 1800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "d1554b1914023863", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "TF -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)" }, { "50.00 percentile latency (ns)": 9509193, "90.00 percentile latency (ns)": 10865156, "95.00 percentile latency (ns)": 11434344, "97.00 percentile latency (ns)": 11983207, "99.00 percentile latency (ns)": 14123097, "99.90 percentile latency (ns)": 18987650, "Completed samples per second": 123180.62, "Max latency (ns)": 20570226, "Mean latency (ns)": 9599801, "Min duration satisfied": "Yes", "Min latency (ns)": 5523648, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 123199.63, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 6000", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.scheduled_queries_per_second": 123199.63, "characteristics.scheduled_queries_per_second.normalized_per_core": 12319.963, "characteristics.scheduled_queries_per_second.normalized_per_processor": 12319.963, "characteristics.total": 50000, "ck_system": "DSS8440_QuadroRTX6000x10_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_QuadroRTX6000x10_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_QuadroRTX6000x10_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 123200, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "ad84a4d6f9aec935", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3864544, "90.00 percentile latency (ns)": 4577537, "95.00 percentile latency (ns)": 4805130, "97.00 percentile latency (ns)": 4931925, "99.00 percentile latency (ns)": 5146206, "99.90 percentile latency (ns)": 5848290, "Completed samples per second": 52421.74, "Max latency (ns)": 11894416, "Mean latency (ns)": 3884451, "Min duration satisfied": "Yes", "Min latency (ns)": 1936045, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 52425.14, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIE-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.scheduled_queries_per_second": 52425.14, "characteristics.scheduled_queries_per_second.normalized_per_core": 26212.57, "characteristics.scheduled_queries_per_second.normalized_per_processor": 26212.57, "characteristics.total": 50000, "ck_system": "R7525_A100x2_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.50GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.84 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7525_A100x2_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100x2_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 52400, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "b365455ea116a216", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5981458, "90.00 percentile latency (ns)": 6724449, "95.00 percentile latency (ns)": 7036059, "97.00 percentile latency (ns)": 7354331, "99.00 percentile latency (ns)": 7937909, "99.90 percentile latency (ns)": 8794331, "Completed samples per second": 20399.19, "Max latency (ns)": 12174421, "Mean latency (ns)": 6045807, "Min duration satisfied": "Yes", "Min latency (ns)": 4039405, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 20400.56, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA GRID T4-16Q", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.scheduled_queries_per_second": 20400.56, "characteristics.scheduled_queries_per_second.normalized_per_core": 5100.14, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5100.14, "characteristics.total": 50000, "ck_system": "R740_vT4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "382 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1 TB", "host_storage_type": "Hard Disk vSAN", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R740_vT4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "VMWarePowered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_vT4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 20400, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "17d40629e5f702ab", "use_accelerator": true, "vmt_memory_capacity": "300 GB", "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5038486, "90.00 percentile latency (ns)": 6566560, "95.00 percentile latency (ns)": 7333830, "97.00 percentile latency (ns)": 7707294, "99.00 percentile latency (ns)": 8406563, "99.90 percentile latency (ns)": 15846396, "Completed samples per second": 90009.52, "Max latency (ns)": 21076912, "Mean latency (ns)": 5321885, "Min duration satisfied": "Yes", "Min latency (ns)": 2044582, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 90015.51, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIE-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 90015.51, "characteristics.scheduled_queries_per_second.normalized_per_core": 30005.17, "characteristics.scheduled_queries_per_second.normalized_per_processor": 30005.17, "characteristics.total": 50000, "ck_system": "R7525_A100x3_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.40GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.5 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7525_A100x3_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100x3_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 90000, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "519d941b18e28f45", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 10689435, "90.00 percentile latency (ns)": 13003607, "95.00 percentile latency (ns)": 13400976, "97.00 percentile latency (ns)": 13630854, "99.00 percentile latency (ns)": 14201668, "99.90 percentile latency (ns)": 15517832, "Completed samples per second": 134288.67, "Max latency (ns)": 21287821, "Mean latency (ns)": 11037606, "Min duration satisfied": "Yes", "Min latency (ns)": 2977168, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 134316.28, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 8000", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.scheduled_queries_per_second": 134316.28, "characteristics.scheduled_queries_per_second.normalized_per_core": 13431.628, "characteristics.scheduled_queries_per_second.normalized_per_processor": 13431.628, "characteristics.total": 50000, "ck_system": "DSS8440_QuadroRTX8000x10_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.10GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.84 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_QuadroRTX8000x10_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_QuadroRTX8000x10_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 134300, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "c51a4e7db3a04dbc", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 10276002, "90.00 percentile latency (ns)": 10782118, "95.00 percentile latency (ns)": 10936214, "97.00 percentile latency (ns)": 11053470, "99.00 percentile latency (ns)": 11491089, "99.90 percentile latency (ns)": 18408870, "Completed samples per second": 90600.09, "Max latency (ns)": 60200268, "Mean latency (ns)": 10320533, "Min duration satisfied": "Yes", "Min latency (ns)": 7641272, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 90615.61, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 16, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 90615.61, "characteristics.scheduled_queries_per_second.normalized_per_core": 5663.475625, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5663.475625, "characteristics.total": 50000, "ck_system": "DSS8440_T4x16_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.84 TB", "host_storage_type": "NVMe", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 16, "normalize_processors": 16, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_T4x16_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_T4x16_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 90600, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "ce45169eaf0e09b7", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5623964, "90.00 percentile latency (ns)": 6917569, "95.00 percentile latency (ns)": 7529585, "97.00 percentile latency (ns)": 7891975, "99.00 percentile latency (ns)": 8723773, "99.90 percentile latency (ns)": 13803422, "Completed samples per second": 21504.08, "Max latency (ns)": 24428437, "Mean latency (ns)": 5811442, "Min duration satisfied": "Yes", "Min latency (ns)": 1877045, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 21506.31, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.scheduled_queries_per_second": 21506.31, "characteristics.scheduled_queries_per_second.normalized_per_core": 5376.5775, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5376.5775, "characteristics.total": 50000, "ck_system": "R7515_T4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.0.14, CUDA 11.0.207", "host_memory_capacity": "256 GB", "host_memory_configuration": "DDR-4", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "2.0GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 1, "host_storage_capacity": "3.2 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7515_T4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.0.14, CUDA 11.0.27, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7515_T4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 21500, "task": "image classification", "task2": "image classification", "total_cores": 120, "uid": "d9b07204f3ae9fa9", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 7016099, "90.00 percentile latency (ns)": 10251013, "95.00 percentile latency (ns)": 11572295, "97.00 percentile latency (ns)": 12614338, "99.00 percentile latency (ns)": 14962950, "99.90 percentile latency (ns)": 19371802, "Completed samples per second": 1497.6, "Max latency (ns)": 29990844, "Mean latency (ns)": 7874130, "Min duration satisfied": "Yes", "Min latency (ns)": 5934484, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "SUT\u0000", "Scenario": "server", "Scheduled samples per second": 1497.65, "accelerator_frequency": "-", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "-", "accelerator_memory_configuration": "-", "accelerator_model_name": "-", "accelerator_on-chip_memories": "-", "accelerators_per_node": 0, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.292, "characteristics.good": 38146, "characteristics.scheduled_queries_per_second": 1497.65, "characteristics.scheduled_queries_per_second.normalized_per_core": 6.240208333333333, "characteristics.scheduled_queries_per_second.normalized_per_processor": 748.825, "characteristics.total": 50000, "ck_system": "R640_6248R-openvino-2020", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "OpenVINO 2020", "host_memory_capacity": "188GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "3.00GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "200GB", "host_storage_type": "SATA", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 240, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R640_6248R-openvino-2020", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 1024, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "no", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "/home/dividiti/mlperf_ext_ov_cpp_v0.7-master/Models/resnet50/resnet50_int8.xml", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R640_6248R-openvino-2020", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 1499, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "4fe5f9b34ef4bceb", "use_accelerator": false, "weight_data_types": "int8", "weight_transformations": "TF -> OpenVINO (please refer to closed/Intel/calibration/OpenVINO)" }, { "50.00 percentile latency (ns)": 9260556, "90.00 percentile latency (ns)": 9885094, "95.00 percentile latency (ns)": 10066277, "97.00 percentile latency (ns)": 10198034, "99.00 percentile latency (ns)": 11420593, "99.90 percentile latency (ns)": 69461830, "Completed samples per second": 62306.36, "Max latency (ns)": 99789548, "Mean latency (ns)": 9615534, "Min duration satisfied": "Yes", "Min latency (ns)": 5309850, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 62311.87, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 12, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.scheduled_queries_per_second": 62311.87, "characteristics.scheduled_queries_per_second.normalized_per_core": 5192.655833333333, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5192.655833333333, "characteristics.total": 50000, "ck_system": "DSS8440_T4x12_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 12, "normalize_processors": 12, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/DSS8440_T4x12_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_T4x12_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 62300, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "d4dd3ba013400923", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 9227122, "90.00 percentile latency (ns)": 9980210, "95.00 percentile latency (ns)": 10176375, "97.00 percentile latency (ns)": 10348499, "99.00 percentile latency (ns)": 10944173, "99.90 percentile latency (ns)": 12997423, "Completed samples per second": 52518.98, "Max latency (ns)": 21194949, "Mean latency (ns)": 9251775, "Min duration satisfied": "Yes", "Min latency (ns)": 6426142, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 52527.02, "accelerator_frequency": "1770MHz", "accelerator_host_interconnect": "PCI Express 3.0", "accelerator_interconnect": "PCI Express 3.0", "accelerator_interconnect_topology": "2 Accelerators per CPU", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 6000", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.scheduled_queries_per_second": 52527.02, "characteristics.scheduled_queries_per_second.normalized_per_core": 13131.755, "characteristics.scheduled_queries_per_second.normalized_per_processor": 13131.755, "characteristics.total": 50000, "ck_system": "C4140_QuadroRTX6000x4_TRT", "ck_used": true, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "384 GB", "host_memory_configuration": "6x16GB DDR4-2666 HMA82GR7AFR8N-VK RDIMM ECC", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "1.25MB+20MB+27.5MB", "host_processor_core_count": 120, "host_processor_frequency": "2.40GHz", "host_processor_interconnect": "Ultra Path Interconnect", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "1.6 TB (1x1.6TB Dell Express Flash PM1725a 1.6TB AIC)", "host_storage_type": "3D-TLC Solid State with PCIe NVME x8 Interface", "hw_notes": "ECC off. RTX6000 is available as a special config thru Dell DSS or OEM for PowerEdge C4140", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/C4140_QuadroRTX6000x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/C4140_QuadroRTX6000x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 52500, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "3f7d48410e4e5597", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 11109820, "90.00 percentile latency (ns)": 12434634, "95.00 percentile latency (ns)": 12940522, "97.00 percentile latency (ns)": 13319811, "99.00 percentile latency (ns)": 14135479, "99.90 percentile latency (ns)": 16144956, "Completed samples per second": 41481.84, "Max latency (ns)": 20866592, "Mean latency (ns)": 11180967, "Min duration satisfied": "Yes", "Min latency (ns)": 2106952, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 41488.85, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "48 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA Quadro RTX 8000", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.064, "characteristics.good": 38032, "characteristics.scheduled_queries_per_second": 41488.85, "characteristics.scheduled_queries_per_second.normalized_per_core": 13829.616666666667, "characteristics.scheduled_queries_per_second.normalized_per_processor": 13829.616666666667, "characteristics.total": 50000, "ck_system": "R7525_QuadroRTX8000x3_TRT", "ck_used": false, "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0, cuDNN 8.0.2, cuBLAS 11.2.0, libjemalloc2, cub 1.8.0, tensorrt-laboratory mlperf branch", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 32, "host_processor_frequency": "2.50GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7502", "host_processors_per_node": 2, "host_storage_capacity": "1.84 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R7525_QuadroRTX8000x3_TRT", "number_of_nodes": 1, "operating_system": "CentOS Linux release 8.1.1911", "other_software_stack": "docker 19.03.12, python 3.6.8, gcc 5.5.0, onnx 1.3.0, tensorflow 1.13.1, pytorch 1.1.0, torchvision 0.3.0, pycuda 2019.1, sacrebleu 1.3.3, SimpleJSON, OpenCV 4.1.1", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_QuadroRTX8000x3_TRT", "system_name": "Dell EMC PowerEdge R7525 (3x Quadro RTX 8000)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 41481, "task": "image classification", "task2": "image classification", "total_cores": 64, "uid": "2b574e2783e65456", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 6507451, "90.00 percentile latency (ns)": 8799585, "95.00 percentile latency (ns)": 9683614, "97.00 percentile latency (ns)": 10407448, "99.00 percentile latency (ns)": 12031863, "99.90 percentile latency (ns)": 15084976, "Completed samples per second": 21803.57, "Max latency (ns)": 17737363, "Mean latency (ns)": 6946347, "Min duration satisfied": "Yes", "Min latency (ns)": 4088351, "Min queries satisfied": "Yes", "Mode": "Performance", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "LWIS_Server", "Scenario": "server", "Scheduled samples per second": 21805.14, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.918, "characteristics.good": 37959, "characteristics.scheduled_queries_per_second": 21805.14, "characteristics.scheduled_queries_per_second.normalized_per_core": 5451.285, "characteristics.scheduled_queries_per_second.normalized_per_processor": 5451.285, "characteristics.total": 50000, "ck_system": "R740_T4x4_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.0.14, CUDA 11.0.207", "host_memory_capacity": "384 GB", "host_memory_configuration": "DDR-4", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "3.0GHz", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "SSD", "hw_notes": "ECC on", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 270336, "mlperf_version": 0.7, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/DellEMC/results/R740_T4x4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.0.14, CUDA 11.0.207, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 1, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "DellEMC", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/DellEMC", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R740_T4x4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 15000000, "target_qps": 21800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "ea74e0da4dea9560", "use_accelerator": true, "weight_data_types": "int8,fp16", "weight_transformations": "quantization, affine fusion" } ]