[ { "50.00 percentile latency (ns)": 2353492, "90.00 percentile latency (ns)": 4034462, "95.00 percentile latency (ns)": 4637787, "97.00 percentile latency (ns)": 5076538, "99.00 percentile latency (ns)": 6080931, "99.90 percentile latency (ns)": 12656349, "Completed samples per second": 200037.31, "Max latency (ns)": 22995471, "Mean latency (ns)": 2563848, "Min duration satisfied": "Yes", "Min latency (ns)": 171696, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 200038.16, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIE-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 200038.16, "characteristics.scheduled_queries_per_second.normalized_per_core": 50009.54, "characteristics.scheduled_queries_per_second.normalized_per_processor": 50009.54, "ck_system": "SYS-120GQ-TNRT_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.1, CUDA 11.4", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 18, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6354", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/results/SYS-120GQ-TNRT_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.1", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.4, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/SYS-120GQ-TNRT_TRT", "system_name": "Supermicro SYS-120GQ-TNRT (4x A100-PCIe-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 200000, "task": "recommendation", "task2": "recommendation", "total_cores": 36, "uid": "40861d9996f45edb", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 2353492, "90.00 percentile latency (ns)": 4034462, "95.00 percentile latency (ns)": 4637787, "97.00 percentile latency (ns)": 5076538, "99.00 percentile latency (ns)": 6080931, "99.90 percentile latency (ns)": 12656349, "Completed samples per second": 200037.31, "Max latency (ns)": 22995471, "Mean latency (ns)": 2563848, "Min duration satisfied": "Yes", "Min latency (ns)": 171696, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 200038.16, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIE-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 200038.16, "characteristics.scheduled_queries_per_second.normalized_per_core": 50009.54, "characteristics.scheduled_queries_per_second.normalized_per_processor": 50009.54, "ck_system": "SYS-120GQ-TNRT_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.1, CUDA 11.4", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 18, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6354", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/results/SYS-120GQ-TNRT_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.1", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.4, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/SYS-120GQ-TNRT_TRT", "system_name": "Supermicro SYS-120GQ-TNRT (4x A100-PCIe-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 200000, "task": "recommendation", "task2": "recommendation", "total_cores": 36, "uid": "2d48fda15913aaac", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 459567, "90.00 percentile latency (ns)": 884940, "95.00 percentile latency (ns)": 1171389, "97.00 percentile latency (ns)": 1395475, "99.00 percentile latency (ns)": 7880189, "99.90 percentile latency (ns)": 374328355, "Completed samples per second": 160035.67, "Max latency (ns)": 580431813, "Mean latency (ns)": 2142940, "Min duration satisfied": "Yes", "Min latency (ns)": 158139, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 160035.75, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 160035.75, "characteristics.scheduled_queries_per_second.normalized_per_core": 20004.46875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 20004.46875, "ck_system": "A30x8_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "7 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/results/A30x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "SUPERMICRO", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT", "system_name": "Supermicro AS-4124GS-TNR (8x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 160000, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "98fb34a9dcc3a495", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 459567, "90.00 percentile latency (ns)": 884940, "95.00 percentile latency (ns)": 1171389, "97.00 percentile latency (ns)": 1395475, "99.00 percentile latency (ns)": 7880189, "99.90 percentile latency (ns)": 374328355, "Completed samples per second": 160035.67, "Max latency (ns)": 580431813, "Mean latency (ns)": 2142940, "Min duration satisfied": "Yes", "Min latency (ns)": 158139, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 160035.75, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 160035.75, "characteristics.scheduled_queries_per_second.normalized_per_core": 20004.46875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 20004.46875, "ck_system": "A30x8_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "7 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/results/A30x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "SUPERMICRO", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT", "system_name": "Supermicro AS-4124GS-TNR (8x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 160000, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "280d4702af35b2d3", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5497872, "90.00 percentile latency (ns)": 6166755, "95.00 percentile latency (ns)": 6490519, "97.00 percentile latency (ns)": 6890314, "99.00 percentile latency (ns)": 10073627, "99.90 percentile latency (ns)": 15796045, "Completed samples per second": 599197.82, "Max latency (ns)": 1001760543, "Mean latency (ns)": 5578032, "Min duration satisfied": "Yes", "Min latency (ns)": 4109492, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 600198.05, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 600198.05, "characteristics.scheduled_queries_per_second.normalized_per_core": 75024.75625, "characteristics.scheduled_queries_per_second.normalized_per_processor": 75024.75625, "ck_system": "A30x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "7 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/results/A30x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "SUPERMICRO", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT_Triton", "system_name": "Supermicro AS-4124GS-TNR (8x A30, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 600000, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "92b249d788863ca1", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5497872, "90.00 percentile latency (ns)": 6166755, "95.00 percentile latency (ns)": 6490519, "97.00 percentile latency (ns)": 6890314, "99.00 percentile latency (ns)": 10073627, "99.90 percentile latency (ns)": 15796045, "Completed samples per second": 599197.82, "Max latency (ns)": 1001760543, "Mean latency (ns)": 5578032, "Min duration satisfied": "Yes", "Min latency (ns)": 4109492, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 600198.05, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 600198.05, "characteristics.scheduled_queries_per_second.normalized_per_core": 75024.75625, "characteristics.scheduled_queries_per_second.normalized_per_processor": 75024.75625, "ck_system": "A30x8_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "7 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/results/A30x8_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "SUPERMICRO", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT_Triton", "system_name": "Supermicro AS-4124GS-TNR (8x A30, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 600000, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "42eb7b784acbb7a1", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5136820, "90.00 percentile latency (ns)": 6854132, "95.00 percentile latency (ns)": 9408762, "97.00 percentile latency (ns)": 11481893, "99.00 percentile latency (ns)": 15291787, "99.90 percentile latency (ns)": 24042653, "Completed samples per second": 330058.5, "Max latency (ns)": 41177041, "Mean latency (ns)": 5453536, "Min duration satisfied": "Yes", "Min latency (ns)": 313429, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 330060.98, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 330060.98, "characteristics.scheduled_queries_per_second.normalized_per_core": 33006.098, "characteristics.scheduled_queries_per_second.normalized_per_processor": 33006.098, "ck_system": "SYS-420GP-TNR_TRT-1", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "CUDA 11.4", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 36, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8360Y CPU @ 2.40GHz", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/results/SYS-420GP-TNR_TRT-1", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "CUDA 11.4, Driver 470.57.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/SYS-420GP-TNR_TRT-1", "system_name": "Supermicro SYS-420GP-TNR [10xA100-PCIe-40GB]", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 330000, "task": "recommendation", "task2": "recommendation", "total_cores": 72, "uid": "f4777abc7cc6e9fe", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5136820, "90.00 percentile latency (ns)": 6854132, "95.00 percentile latency (ns)": 9408762, "97.00 percentile latency (ns)": 11481893, "99.00 percentile latency (ns)": 15291787, "99.90 percentile latency (ns)": 24042653, "Completed samples per second": 330058.5, "Max latency (ns)": 41177041, "Mean latency (ns)": 5453536, "Min duration satisfied": "Yes", "Min latency (ns)": 313429, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 330060.98, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 10, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 330060.98, "characteristics.scheduled_queries_per_second.normalized_per_core": 33006.098, "characteristics.scheduled_queries_per_second.normalized_per_processor": 33006.098, "ck_system": "SYS-420GP-TNR_TRT-1", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "CUDA 11.4", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 36, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8360Y CPU @ 2.40GHz", "host_processors_per_node": 2, "host_storage_capacity": "3.84 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 10, "normalize_processors": 10, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Supermicro/results/SYS-420GP-TNR_TRT-1", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "CUDA 11.4, Driver 470.57.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/SYS-420GP-TNR_TRT-1", "system_name": "Supermicro SYS-420GP-TNR [10xA100-PCIe-40GB]", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 330000, "task": "recommendation", "task2": "recommendation", "total_cores": 72, "uid": "e857496f3394482a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 1494831, "90.00 percentile latency (ns)": 2593477, "95.00 percentile latency (ns)": 3098824, "97.00 percentile latency (ns)": 3571827, "99.00 percentile latency (ns)": 5147144, "99.90 percentile latency (ns)": 23593742, "Completed samples per second": 2102136.07, "Max latency (ns)": 39795178, "Mean latency (ns)": 1727789, "Min duration satisfied": "Yes", "Min latency (ns)": 158134, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 2102141.95, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 2102141.95, "characteristics.scheduled_queries_per_second.normalized_per_core": 262767.74375, "characteristics.scheduled_queries_per_second.normalized_per_processor": 262767.74375, "ck_system": "AS-4124GO-NART_A100-SXM4-40GBx8_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/AS-4124GO-NART_A100-SXM4-40GBx8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AS-4124GO-NART_A100-SXM4-40GBx8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 2100000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "ed31df228543d7b1", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 1494831, "90.00 percentile latency (ns)": 2593477, "95.00 percentile latency (ns)": 3098824, "97.00 percentile latency (ns)": 3571827, "99.00 percentile latency (ns)": 5147144, "99.90 percentile latency (ns)": 23593742, "Completed samples per second": 2102136.07, "Max latency (ns)": 39795178, "Mean latency (ns)": 1727789, "Min duration satisfied": "Yes", "Min latency (ns)": 158134, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 2102141.95, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 2102141.95, "characteristics.scheduled_queries_per_second.normalized_per_core": 262767.74375, "characteristics.scheduled_queries_per_second.normalized_per_processor": 262767.74375, "ck_system": "AS-4124GO-NART_A100-SXM4-40GBx8_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/AS-4124GO-NART_A100-SXM4-40GBx8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AS-4124GO-NART_A100-SXM4-40GBx8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 2100000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "45307829fd432886", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 444878, "90.00 percentile latency (ns)": 639249, "95.00 percentile latency (ns)": 730429, "97.00 percentile latency (ns)": 816293, "99.00 percentile latency (ns)": 1586206, "99.90 percentile latency (ns)": 13576344, "Completed samples per second": 300000.74, "Max latency (ns)": 45714033, "Mean latency (ns)": 519370, "Min duration satisfied": "Yes", "Min latency (ns)": 136870, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 300000.87, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 300000.87, "characteristics.scheduled_queries_per_second.normalized_per_core": 75000.2175, "characteristics.scheduled_queries_per_second.normalized_per_processor": 75000.2175, "ck_system": "AS-2124GQ-NART_A100-SXM4-40GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/AS-2124GQ-NART_A100-SXM4-40GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AS-2124GQ-NART_A100-SXM4-40GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 300000, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "407d9c4e40014fa4", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 444878, "90.00 percentile latency (ns)": 639249, "95.00 percentile latency (ns)": 730429, "97.00 percentile latency (ns)": 816293, "99.00 percentile latency (ns)": 1586206, "99.90 percentile latency (ns)": 13576344, "Completed samples per second": 300000.74, "Max latency (ns)": 45714033, "Mean latency (ns)": 519370, "Min duration satisfied": "Yes", "Min latency (ns)": 136870, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 300000.87, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 300000.87, "characteristics.scheduled_queries_per_second.normalized_per_core": 75000.2175, "characteristics.scheduled_queries_per_second.normalized_per_processor": 75000.2175, "ck_system": "AS-2124GQ-NART_A100-SXM4-40GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/AS-2124GQ-NART_A100-SXM4-40GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AS-2124GQ-NART_A100-SXM4-40GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 300000, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "db65b5e37e360d49", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 1143010, "90.00 percentile latency (ns)": 1865747, "95.00 percentile latency (ns)": 3487964, "97.00 percentile latency (ns)": 4924655, "99.00 percentile latency (ns)": 14746243, "99.90 percentile latency (ns)": 40910603, "Completed samples per second": 1002471.7, "Max latency (ns)": 64347763, "Mean latency (ns)": 1614076, "Min duration satisfied": "Yes", "Min latency (ns)": 151895, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 1002472.78, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 1002472.78, "characteristics.scheduled_queries_per_second.normalized_per_core": 250618.195, "characteristics.scheduled_queries_per_second.normalized_per_processor": 250618.195, "ck_system": "AS-2124GQ-NART_A100-SXM-80GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/AS-2124GQ-NART_A100-SXM-80GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AS-2124GQ-NART_A100-SXM-80GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 1002000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "7c147347161427f8", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 1143010, "90.00 percentile latency (ns)": 1865747, "95.00 percentile latency (ns)": 3487964, "97.00 percentile latency (ns)": 4924655, "99.00 percentile latency (ns)": 14746243, "99.90 percentile latency (ns)": 40910603, "Completed samples per second": 1002471.7, "Max latency (ns)": 64347763, "Mean latency (ns)": 1614076, "Min duration satisfied": "Yes", "Min latency (ns)": 151895, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 1002472.78, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 1002472.78, "characteristics.scheduled_queries_per_second.normalized_per_core": 250618.195, "characteristics.scheduled_queries_per_second.normalized_per_processor": 250618.195, "ck_system": "AS-2124GQ-NART_A100-SXM-80GBx4_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/AS-2124GQ-NART_A100-SXM-80GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AS-2124GQ-NART_A100-SXM-80GBx4_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 1002000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "9fa800444b74fc08", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3351507, "90.00 percentile latency (ns)": 5313315, "95.00 percentile latency (ns)": 5924038, "97.00 percentile latency (ns)": 6336616, "99.00 percentile latency (ns)": 6951821, "99.90 percentile latency (ns)": 8044735, "Completed samples per second": 2302561.52, "Max latency (ns)": 48194373, "Mean latency (ns)": 3441274, "Min duration satisfied": "Yes", "Min latency (ns)": 213398, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 2302572.02, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.scheduled_queries_per_second": 2302572.02, "characteristics.scheduled_queries_per_second.normalized_per_core": 287821.5025, "characteristics.scheduled_queries_per_second.normalized_per_processor": 287821.5025, "ck_system": "AS-4214GO-NART_A100-SXM-80GBx8_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/AS-4214GO-NART_A100-SXM-80GBx8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AS-4214GO-NART_A100-SXM-80GBx8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 2300000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "b13b06d53d4e0385", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3351507, "90.00 percentile latency (ns)": 5313315, "95.00 percentile latency (ns)": 5924038, "97.00 percentile latency (ns)": 6336616, "99.00 percentile latency (ns)": 6951821, "99.90 percentile latency (ns)": 8044735, "Completed samples per second": 2302561.52, "Max latency (ns)": 48194373, "Mean latency (ns)": 3441274, "Min duration satisfied": "Yes", "Min latency (ns)": 213398, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 2302572.02, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.scheduled_queries_per_second": 2302572.02, "characteristics.scheduled_queries_per_second.normalized_per_core": 287821.5025, "characteristics.scheduled_queries_per_second.normalized_per_processor": 287821.5025, "ck_system": "AS-4214GO-NART_A100-SXM-80GBx8_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/AS-4214GO-NART_A100-SXM-80GBx8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AS-4214GO-NART_A100-SXM-80GBx8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 2300000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "04825e4fbdaca6d7", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3249145, "90.00 percentile latency (ns)": 5284143, "95.00 percentile latency (ns)": 6007232, "97.00 percentile latency (ns)": 6581966, "99.00 percentile latency (ns)": 7474992, "99.90 percentile latency (ns)": 11179342, "Completed samples per second": 2292546.75, "Max latency (ns)": 33397711, "Mean latency (ns)": 3468091, "Min duration satisfied": "Yes", "Min latency (ns)": 191915, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 2292557.86, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.scheduled_queries_per_second": 2292557.86, "characteristics.scheduled_queries_per_second.normalized_per_core": 286569.7325, "characteristics.scheduled_queries_per_second.normalized_per_processor": 286569.7325, "ck_system": "SYS-420GP-TNAR_A100-SXM-80GBx8_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/SYS-420GP-TNAR_A100-SXM-80GBx8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "preview", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/SYS-420GP-TNAR_A100-SXM-80GBx8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 2290000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "4d75e4645bbb80da", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 3249145, "90.00 percentile latency (ns)": 5284143, "95.00 percentile latency (ns)": 6007232, "97.00 percentile latency (ns)": 6581966, "99.00 percentile latency (ns)": 7474992, "99.90 percentile latency (ns)": 11179342, "Completed samples per second": 2292546.75, "Max latency (ns)": 33397711, "Mean latency (ns)": 3468091, "Min duration satisfied": "Yes", "Min latency (ns)": 191915, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 2292557.86, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.scheduled_queries_per_second": 2292557.86, "characteristics.scheduled_queries_per_second.normalized_per_core": 286569.7325, "characteristics.scheduled_queries_per_second.normalized_per_processor": 286569.7325, "ck_system": "SYS-420GP-TNAR_A100-SXM-80GBx8_TRT", "ck_used": true, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.0, "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/Supermicro/results/SYS-420GP-TNAR_A100-SXM-80GBx8_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "tb00_40M.pt", "status": "preview", "submitter": "Supermicro", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Supermicro", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/SYS-420GP-TNAR_A100-SXM-80GBx8_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 2290000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 240, "uid": "3f301843047179b9", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" } ]