[ { "50.00 percentile latency (ns)": 8416907, "90.00 percentile latency (ns)": 13041290, "95.00 percentile latency (ns)": 16059948, "97.00 percentile latency (ns)": 17846438, "99.00 percentile latency (ns)": 20798726, "99.90 percentile latency (ns)": 27147578, "Completed samples per second": 764555.94, "Max latency (ns)": 43334440, "Mean latency (ns)": 8869521, "Min duration satisfied": "Yes", "Min latency (ns)": 437650, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 764569.05, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIE-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 764569.05, "characteristics.scheduled_queries_per_second.normalized_per_core": 254856.35, "characteristics.scheduled_queries_per_second.normalized_per_processor": 254856.35, "ck_system": "R7525_A100-PCIE-40GBx3_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 32, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7502", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R7525_A100-PCIE-40GBx3_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.2", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100-PCIE-40GBx3_TRT", "system_name": "Dell EMC PowerEdge R7525 (3x A100-PCIE-40GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 764250, "task": "recommendation", "task2": "recommendation", "total_cores": 64, "uid": "1de1b4a1fb3213ea", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 8416907, "90.00 percentile latency (ns)": 13041290, "95.00 percentile latency (ns)": 16059948, "97.00 percentile latency (ns)": 17846438, "99.00 percentile latency (ns)": 20798726, "99.90 percentile latency (ns)": 27147578, "Completed samples per second": 764555.94, "Max latency (ns)": 43334440, "Mean latency (ns)": 8869521, "Min duration satisfied": "Yes", "Min latency (ns)": 437650, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 764569.05, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIE-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 764569.05, "characteristics.scheduled_queries_per_second.normalized_per_core": 254856.35, "characteristics.scheduled_queries_per_second.normalized_per_processor": 254856.35, "ck_system": "R7525_A100-PCIE-40GBx3_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "512 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 32, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7502", "host_processors_per_node": 2, "host_storage_capacity": "1.8 TB", "host_storage_type": "SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R7525_A100-PCIE-40GBx3_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.2", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100-PCIE-40GBx3_TRT", "system_name": "Dell EMC PowerEdge R7525 (3x A100-PCIE-40GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 764250, "task": "recommendation", "task2": "recommendation", "total_cores": 64, "uid": "15c84438d931bf30", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5393531, "90.00 percentile latency (ns)": 7464572, "95.00 percentile latency (ns)": 7877351, "97.00 percentile latency (ns)": 8080759, "99.00 percentile latency (ns)": 8426276, "99.90 percentile latency (ns)": 18489429, "Completed samples per second": 1130657.92, "Max latency (ns)": 47569345, "Mean latency (ns)": 5393383, "Min duration satisfied": "Yes", "Min latency (ns)": 345278, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 1130667.56, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-PCIE-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 1130667.56, "characteristics.scheduled_queries_per_second.normalized_per_core": 282666.89, "characteristics.scheduled_queries_per_second.normalized_per_processor": 282666.89, "ck_system": "R750xa_A100-PCIE-80GBx4_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750xa_A100-PCIE-80GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.2", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-80GBx4_TRT", "system_name": "Dell EMC PowerEdge R750xa (4x A100-PCIE-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 1130000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "87b3e9e8401b3ad8", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5393531, "90.00 percentile latency (ns)": 7464572, "95.00 percentile latency (ns)": 7877351, "97.00 percentile latency (ns)": 8080759, "99.00 percentile latency (ns)": 8426276, "99.90 percentile latency (ns)": 18489429, "Completed samples per second": 1130657.92, "Max latency (ns)": 47569345, "Mean latency (ns)": 5393383, "Min duration satisfied": "Yes", "Min latency (ns)": 345278, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 1130667.56, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-PCIE-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 1130667.56, "characteristics.scheduled_queries_per_second.normalized_per_core": 282666.89, "characteristics.scheduled_queries_per_second.normalized_per_processor": 282666.89, "ck_system": "R750xa_A100-PCIE-80GBx4_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750xa_A100-PCIE-80GBx4_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.2", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-80GBx4_TRT", "system_name": "Dell EMC PowerEdge R750xa (4x A100-PCIE-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 1130000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "ca10a92e34b6a953", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 1534962, "90.00 percentile latency (ns)": 2418350, "95.00 percentile latency (ns)": 2742197, "97.00 percentile latency (ns)": 3028254, "99.00 percentile latency (ns)": 3903655, "99.90 percentile latency (ns)": 7219455, "Completed samples per second": 125518.22, "Max latency (ns)": 14349874, "Mean latency (ns)": 1607610, "Min duration satisfied": "Yes", "Min latency (ns)": 221583, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 125518.34, "accelerator_cooling_type": "passive", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "2.12.3", "characteristics.power": 573.8927166666659, "characteristics.power.normalized_per_core": 286.94635833333297, "characteristics.power.normalized_per_processor": 286.94635833333297, "characteristics.scheduled_queries_per_second": 125518.34, "characteristics.scheduled_queries_per_second.normalized_per_core": 62759.17, "characteristics.scheduled_queries_per_second.normalized_per_processor": 62759.17, "ck_system": "XE2420_A10x2_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "filesystem": "ext3/ext4", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_cooling_type": "air", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 24, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "5.00.00.00", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE2420_A10x2_TRT_MaxQ", "number_of_nodes": 1, "operating_system": "Ubuntu 20.04.2", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_settings": "closed/Dell/power/XE2420_A10x2_power_settings.md", "print_timestamps": 0, "problem": false, "psu_details": "2x2000W", "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_A10x2_TRT_MaxQ", "system_name": "Dell EMC PowerEdge XE2420 (2x A10, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 125500, "task": "recommendation", "task2": "recommendation", "total_cores": 48, "uid": "2404d89abadc2d63", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 1534962, "90.00 percentile latency (ns)": 2418350, "95.00 percentile latency (ns)": 2742197, "97.00 percentile latency (ns)": 3028254, "99.00 percentile latency (ns)": 3903655, "99.90 percentile latency (ns)": 7219455, "Completed samples per second": 125518.22, "Max latency (ns)": 14349874, "Mean latency (ns)": 1607610, "Min duration satisfied": "Yes", "Min latency (ns)": 221583, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 125518.34, "accelerator_cooling_type": "passive", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "2.12.3", "characteristics.power": 573.8927166666659, "characteristics.power.normalized_per_core": 286.94635833333297, "characteristics.power.normalized_per_processor": 286.94635833333297, "characteristics.scheduled_queries_per_second": 125518.34, "characteristics.scheduled_queries_per_second.normalized_per_core": 62759.17, "characteristics.scheduled_queries_per_second.normalized_per_processor": 62759.17, "ck_system": "XE2420_A10x2_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "filesystem": "ext3/ext4", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_cooling_type": "air", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 24, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "5.00.00.00", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE2420_A10x2_TRT_MaxQ", "number_of_nodes": 1, "operating_system": "Ubuntu 20.04.2", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_settings": "closed/Dell/power/XE2420_A10x2_power_settings.md", "print_timestamps": 0, "problem": false, "psu_details": "2x2000W", "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_A10x2_TRT_MaxQ", "system_name": "Dell EMC PowerEdge XE2420 (2x A10, MaxQ, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 125500, "task": "recommendation", "task2": "recommendation", "total_cores": 48, "uid": "91df6b4637b29fb3", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5591352, "90.00 percentile latency (ns)": 6400770, "95.00 percentile latency (ns)": 6539085, "97.00 percentile latency (ns)": 6628359, "99.00 percentile latency (ns)": 7235654, "99.90 percentile latency (ns)": 11232527, "Completed samples per second": 500134.38, "Max latency (ns)": 14224619, "Mean latency (ns)": 5618709, "Min duration satisfied": "Yes", "Min latency (ns)": 4318994, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 500138.17, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-PCIE-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 500138.17, "characteristics.scheduled_queries_per_second.normalized_per_core": 125034.5425, "characteristics.scheduled_queries_per_second.normalized_per_processor": 125034.5425, "ck_system": "R750xa_A100-PCIE-80GBx4_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750xa_A100-PCIE-80GBx4_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.2", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-80GBx4_TRT_Triton", "system_name": "Dell EMC PowerEdge R750xa (4x A100-PCIE-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 500000, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "d704b70875a0a17e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 5591352, "90.00 percentile latency (ns)": 6400770, "95.00 percentile latency (ns)": 6539085, "97.00 percentile latency (ns)": 6628359, "99.00 percentile latency (ns)": 7235654, "99.90 percentile latency (ns)": 11232527, "Completed samples per second": 500134.38, "Max latency (ns)": 14224619, "Mean latency (ns)": 5618709, "Min duration satisfied": "Yes", "Min latency (ns)": 4318994, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "Triton_Server", "Scenario": "server", "Scheduled samples per second": 500138.17, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-PCIE-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 500138.17, "characteristics.scheduled_queries_per_second.normalized_per_core": 125034.5425, "characteristics.scheduled_queries_per_second.normalized_per_processor": 125034.5425, "ck_system": "R750xa_A100-PCIE-80GBx4_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz", "host_processors_per_node": 2, "host_storage_capacity": "3.5 TB", "host_storage_type": "SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750xa_A100-PCIE-80GBx4_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.2", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-80GBx4_TRT_Triton", "system_name": "Dell EMC PowerEdge R750xa (4x A100-PCIE-80GB, TensorRT, Triton)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 500000, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "d74cefa336380fb5", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 8244448, "90.00 percentile latency (ns)": 13244852, "95.00 percentile latency (ns)": 14133677, "97.00 percentile latency (ns)": 14651863, "99.00 percentile latency (ns)": 15613836, "99.90 percentile latency (ns)": 17143635, "Completed samples per second": 800333.32, "Max latency (ns)": 23282420, "Mean latency (ns)": 7472351, "Min duration satisfied": "Yes", "Min latency (ns)": 142483, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 800340.76, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 800340.76, "characteristics.scheduled_queries_per_second.normalized_per_core": 100042.595, "characteristics.scheduled_queries_per_second.normalized_per_processor": 100042.595, "ck_system": "DSS8440_A30x8_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 48, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6248R CPU @ 3.00GHz", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/DSS8440_A30x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.2", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A30x8_TRT", "system_name": "Dell EMC DSS 8440 (8x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 800000, "task": "recommendation", "task2": "recommendation", "total_cores": 96, "uid": "a34ea2b1817e6f5a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 8244448, "90.00 percentile latency (ns)": 13244852, "95.00 percentile latency (ns)": 14133677, "97.00 percentile latency (ns)": 14651863, "99.00 percentile latency (ns)": 15613836, "99.90 percentile latency (ns)": 17143635, "Completed samples per second": 800333.32, "Max latency (ns)": 23282420, "Mean latency (ns)": 7472351, "Min duration satisfied": "Yes", "Min latency (ns)": 142483, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 800340.76, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 8, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 800340.76, "characteristics.scheduled_queries_per_second.normalized_per_core": 100042.595, "characteristics.scheduled_queries_per_second.normalized_per_processor": 100042.595, "ck_system": "DSS8440_A30x8_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 48, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6248R CPU @ 3.00GHz", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 8, "normalize_processors": 8, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/DSS8440_A30x8_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.2", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A30x8_TRT", "system_name": "Dell EMC DSS 8440 (8x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 800000, "task": "recommendation", "task2": "recommendation", "total_cores": 96, "uid": "f9bd504dc39468a8", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 734219, "90.00 percentile latency (ns)": 1270546, "95.00 percentile latency (ns)": 1626229, "97.00 percentile latency (ns)": 2005750, "99.00 percentile latency (ns)": 3410111, "99.90 percentile latency (ns)": 6082072, "Completed samples per second": 220963.28, "Max latency (ns)": 19544230, "Mean latency (ns)": 849565, "Min duration satisfied": "Yes", "Min latency (ns)": 150693, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 220963.44, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 220963.44, "characteristics.scheduled_queries_per_second.normalized_per_core": 73654.48, "characteristics.scheduled_queries_per_second.normalized_per_processor": 73654.48, "ck_system": "R7525_A30x3_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7763", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R7525_A30x3_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.4.2105", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A30x3_TRT", "system_name": "Dell EMC PowerEdge R7525 (3x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 220925, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "9301ccc544038b33", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 734219, "90.00 percentile latency (ns)": 1270546, "95.00 percentile latency (ns)": 1626229, "97.00 percentile latency (ns)": 2005750, "99.00 percentile latency (ns)": 3410111, "99.90 percentile latency (ns)": 6082072, "Completed samples per second": 220963.28, "Max latency (ns)": 19544230, "Mean latency (ns)": 849565, "Min duration satisfied": "Yes", "Min latency (ns)": 150693, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 220963.44, "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 3, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.scheduled_queries_per_second": 220963.44, "characteristics.scheduled_queries_per_second.normalized_per_core": 73654.48, "characteristics.scheduled_queries_per_second.normalized_per_processor": 73654.48, "ck_system": "R7525_A30x3_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7763", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 3, "normalize_processors": 3, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R7525_A30x3_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "CentOS 8.4.2105", "other_hardware": "", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A30x3_TRT", "system_name": "Dell EMC PowerEdge R7525 (3x A30, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 220925, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "4b2840cea18177bf", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 4396316, "90.00 percentile latency (ns)": 6209967, "95.00 percentile latency (ns)": 7796823, "97.00 percentile latency (ns)": 10080727, "99.00 percentile latency (ns)": 15351913, "99.90 percentile latency (ns)": 26672313, "Completed samples per second": 190535.31, "Max latency (ns)": 39071746, "Mean latency (ns)": 4718733, "Min duration satisfied": "Yes", "Min latency (ns)": 274766, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 190536.07, "accelerator_cooling_type": "passive", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "2.12.3", "characteristics.scheduled_queries_per_second": 190536.07, "characteristics.scheduled_queries_per_second.normalized_per_core": 95268.035, "characteristics.scheduled_queries_per_second.normalized_per_processor": 95268.035, "ck_system": "XE2420_A10x2_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "filesystem": "ext3/ext4", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_cooling_type": "air", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 24, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "5.00.00.00", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE2420_A10x2_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 20.04.2", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_settings": "closed/Dell/power/XE2420_A10x2_power_settings.md", "print_timestamps": 0, "problem": false, "psu_details": "2x2000W", "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_A10x2_TRT", "system_name": "Dell EMC PowerEdge XE2420 (2x A10, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 190500, "task": "recommendation", "task2": "recommendation", "total_cores": 48, "uid": "68b19e5f5049ee62", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 4396316, "90.00 percentile latency (ns)": 6209967, "95.00 percentile latency (ns)": 7796823, "97.00 percentile latency (ns)": 10080727, "99.00 percentile latency (ns)": 15351913, "99.90 percentile latency (ns)": 26672313, "Completed samples per second": 190535.31, "Max latency (ns)": 39071746, "Mean latency (ns)": 4718733, "Min duration satisfied": "Yes", "Min latency (ns)": 274766, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 190536.07, "accelerator_cooling_type": "passive", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 2, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "2.12.3", "characteristics.scheduled_queries_per_second": 190536.07, "characteristics.scheduled_queries_per_second.normalized_per_core": 95268.035, "characteristics.scheduled_queries_per_second.normalized_per_processor": 95268.035, "ck_system": "XE2420_A10x2_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "filesystem": "ext3/ext4", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_cooling_type": "air", "host_memory_capacity": "384 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 24, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC on", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "5.00.00.00", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "normalize_cores": 2, "normalize_processors": 2, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE2420_A10x2_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 20.04.2", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_settings": "closed/Dell/power/XE2420_A10x2_power_settings.md", "print_timestamps": 0, "problem": false, "psu_details": "2x2000W", "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_A10x2_TRT", "system_name": "Dell EMC PowerEdge XE2420 (2x A10, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 190500, "task": "recommendation", "task2": "recommendation", "total_cores": 48, "uid": "b094afeb15b7ff4a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 2489545, "90.00 percentile latency (ns)": 4143542, "95.00 percentile latency (ns)": 4812286, "97.00 percentile latency (ns)": 5949147, "99.00 percentile latency (ns)": 9269024, "99.90 percentile latency (ns)": 21037171, "Completed samples per second": 1200745.4, "Max latency (ns)": 60454310, "Mean latency (ns)": 2763587, "Min duration satisfied": "Yes", "Min latency (ns)": 221299, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 1200747.55, "accelerator_cooling_type": "", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "2.2.5", "characteristics.power": 2796.3493333333363, "characteristics.power.normalized_per_core": 699.0873333333341, "characteristics.power.normalized_per_processor": 699.0873333333341, "characteristics.scheduled_queries_per_second": 1200747.55, "characteristics.scheduled_queries_per_second.normalized_per_core": 300186.8875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 300186.8875, "ck_system": "XE8545_A100-SXM-80GBx4_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "filesystem": "ext3/ext4", "formal_model": "dlrm", "formal_model_accuracy": 99.9, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_cooling_type": "", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7763", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe SSD", "hw_notes": "500W A100-SXM-80GB", "informal_model": "dlrm-99.9", "input_data_types": "int8", "management_firmware_version": "4.40.40.151", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE8545_A100-SXM-80GBx4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 20.04.2", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_settings": "closed/Dell/power/XE8545_A100-SXM-80GBx4_power_settings.md", "print_timestamps": 0, "problem": false, "psu_details": "4x2400W", "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_A100-SXM-80GBx4_TRT", "system_name": "Dell EMC PowerEdge XE8545 (4x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 1200000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "d906c4a76bf8d8ee", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 2489545, "90.00 percentile latency (ns)": 4143542, "95.00 percentile latency (ns)": 4812286, "97.00 percentile latency (ns)": 5949147, "99.00 percentile latency (ns)": 9269024, "99.90 percentile latency (ns)": 21037171, "Completed samples per second": 1200745.4, "Max latency (ns)": 60454310, "Mean latency (ns)": 2763587, "Min duration satisfied": "Yes", "Min latency (ns)": 221299, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Performance constraints satisfied": "Yes", "Result is": "VALID", "SUT name": "DLRM SERVER", "Scenario": "server", "Scheduled samples per second": 1200747.55, "accelerator_cooling_type": "", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 4, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "2.2.5", "characteristics.power": 2796.3493333333363, "characteristics.power.normalized_per_core": 699.0873333333341, "characteristics.power.normalized_per_processor": 699.0873333333341, "characteristics.scheduled_queries_per_second": 1200747.55, "characteristics.scheduled_queries_per_second.normalized_per_core": 300186.8875, "characteristics.scheduled_queries_per_second.normalized_per_processor": 300186.8875, "ck_system": "XE8545_A100-SXM-80GBx4_TRT", "ck_used": false, "cooling": "", "dataset": "1TB Click Logs", "dataset_link": "", "dim_x_default": "characteristics.scheduled_queries_per_second", "dim_y_default": "characteristics.AUC", "dim_y_maximize": true, "division": "closed", "filesystem": "ext3/ext4", "formal_model": "dlrm", "formal_model_accuracy": 99.0, "formal_model_link": "", "framework": "TensorRT 8.0.2, CUDA 11.3", "host_cooling_type": "", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7763", "host_processors_per_node": 2, "host_storage_capacity": "3 TB", "host_storage_type": "NVMe SSD", "hw_notes": "500W A100-SXM-80GB", "informal_model": "dlrm-99", "input_data_types": "int8", "management_firmware_version": "4.40.40.151", "max_async_queries": 0, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 270336, "mlperf_version": 1.1, "normalize_cores": 4, "normalize_processors": 4, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE8545_A100-SXM-80GBx4_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 20.04.2", "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 204800, "power_management": "", "power_settings": "closed/Dell/power/XE8545_A100-SXM-80GBx4_power_settings.md", "print_timestamps": 0, "problem": false, "psu_details": "4x2400W", "qsl_rng_seed": 1624344308455410291, "retraining": "N", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "tb00_40M.pt", "status": "available", "submitter": "Dell", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell", "sw_notes": "", "system_cooling_type": "air", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_A100-SXM-80GBx4_TRT", "system_name": "Dell EMC PowerEdge XE8545 (4x A100-SXM-80GB, TensorRT)", "system_type": "datacenter", "target_latency (ns)": 30000000, "target_qps": 1200000.0, "task": "recommendation", "task2": "recommendation", "total_cores": 128, "uid": "5b42fd95b021f1e2", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" } ]