[
  {
    "50.00 percentile latency (ns)": 443127891,
    "90.00 percentile latency (ns)": 607817880,
    "95.00 percentile latency (ns)": 630240160,
    "97.00 percentile latency (ns)": 688589289,
    "99.00 percentile latency (ns)": 707406774,
    "99.90 percentile latency (ns)": 790473453,
    "Completed samples per second": 32556.39,
    "Max latency (ns)": 898314000,
    "Mean latency (ns)": 468202295,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 57535297,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Performance constraints satisfied": "Yes",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Scenario": "server",
    "Scheduled samples per second": 32577.05,
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIE-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 3,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.54549144287357,
    "characteristics.scheduled_queries_per_second": 32577.05,
    "characteristics.scheduled_queries_per_second.normalized_per_core": 10859.016666666666,
    "characteristics.scheduled_queries_per_second.normalized_per_processor": 10859.016666666666,
    "characteristics.word error rate": 7.45450855712643,
    "ck_system": "R7525_A100-PCIE-40GBx3_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.scheduled_queries_per_second",
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 32,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7502",
    "host_processors_per_node": 2,
    "host_storage_capacity": "1.8 TB",
    "host_storage_type": "SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 0,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 270336,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 3,
    "normalize_processors": 3,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R7525_A100-PCIE-40GBx3_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A100-PCIE-40GBx3_TRT",
    "system_name": "Dell EMC PowerEdge R7525 (3x A100-PCIE-40GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 1000000000,
    "target_qps": 32580,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 64,
    "uid": "441885c019cf0e1d",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 398023774,
    "90.00 percentile latency (ns)": 540956588,
    "95.00 percentile latency (ns)": 587238260,
    "97.00 percentile latency (ns)": 612792945,
    "99.00 percentile latency (ns)": 629150271,
    "99.90 percentile latency (ns)": 704284571,
    "Completed samples per second": 48483.57,
    "Max latency (ns)": 803727106,
    "Mean latency (ns)": 420617026,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 39980330,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Performance constraints satisfied": "Yes",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Scenario": "server",
    "Scheduled samples per second": 48504.44,
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-PCIE-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.55000112742113,
    "characteristics.scheduled_queries_per_second": 48504.44,
    "characteristics.scheduled_queries_per_second.normalized_per_core": 12126.11,
    "characteristics.scheduled_queries_per_second.normalized_per_processor": 12126.11,
    "characteristics.word error rate": 7.449998872578862,
    "ck_system": "R750xa_A100-PCIE-80GBx4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.scheduled_queries_per_second",
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3.5 TB",
    "host_storage_type": "SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 0,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 270336,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R750xa_A100-PCIE-80GBx4_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R750xa_A100-PCIE-80GBx4_TRT",
    "system_name": "Dell EMC PowerEdge R750xa (4x A100-PCIE-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 1000000000,
    "target_qps": 48500,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "181459829e47b497",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 153915871,
    "90.00 percentile latency (ns)": 216466463,
    "95.00 percentile latency (ns)": 252229324,
    "97.00 percentile latency (ns)": 259722743,
    "99.00 percentile latency (ns)": 267813246,
    "99.90 percentile latency (ns)": 281790720,
    "Completed samples per second": 3301.67,
    "Max latency (ns)": 334832425,
    "Mean latency (ns)": 158293487,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 50453863,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Performance constraints satisfied": "Yes",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Scenario": "server",
    "Scheduled samples per second": 3302.19,
    "accelerator_cooling_type": "passive",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 2,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "2.12.3",
    "characteristics.accuracy": 92.54549144287357,
    "characteristics.power": 540.312066666667,
    "characteristics.power.normalized_per_core": 270.1560333333335,
    "characteristics.power.normalized_per_processor": 270.1560333333335,
    "characteristics.scheduled_queries_per_second": 3302.19,
    "characteristics.scheduled_queries_per_second.normalized_per_core": 1651.095,
    "characteristics.scheduled_queries_per_second.normalized_per_processor": 1651.095,
    "characteristics.word error rate": 7.45450855712643,
    "ck_system": "XE2420_A10x2_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.scheduled_queries_per_second",
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "filesystem": "ext3/ext4",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_cooling_type": "air",
    "host_memory_capacity": "384 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 24,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC on",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "5.00.00.00",
    "max_async_queries": 0,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 270336,
    "mlperf_version": 1.1,
    "normalize_cores": 2,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE2420_A10x2_TRT_MaxQ",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.04.2",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_settings": "closed/Dell/power/XE2420_A10x2_power_settings.md",
    "print_timestamps": 0,
    "problem": false,
    "psu_details": "2x2000W",
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_cooling_type": "air",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_A10x2_TRT_MaxQ",
    "system_name": "Dell EMC PowerEdge XE2420 (2x A10, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 1000000000,
    "target_qps": 3300,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 48,
    "uid": "0a2cc03c6d4139b0",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 537416303,
    "90.00 percentile latency (ns)": 742691063,
    "95.00 percentile latency (ns)": 790249792,
    "97.00 percentile latency (ns)": 826606213,
    "99.00 percentile latency (ns)": 880105485,
    "99.90 percentile latency (ns)": 965841808,
    "Completed samples per second": 117746.1,
    "Max latency (ns)": 1172543151,
    "Mean latency (ns)": 548747507,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 45704365,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Performance constraints satisfied": "Yes",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Scenario": "server",
    "Scheduled samples per second": 117865.66,
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-PCIE-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 10,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.scheduled_queries_per_second": 117865.66,
    "characteristics.scheduled_queries_per_second.normalized_per_core": 11786.566,
    "characteristics.scheduled_queries_per_second.normalized_per_processor": 11786.566,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "DSS8440_A100-PCIE-80GBx10_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.scheduled_queries_per_second",
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": " 768GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 24,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6248R CPU @ 3.00GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "1.8 TB",
    "host_storage_type": "SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 0,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 270336,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 10,
    "normalize_processors": 10,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/DSS8440_A100-PCIE-80GBx10_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A100-PCIE-80GBx10_TRT",
    "system_name": "Dell EMC DSS 8440 (10x NVIDIA A100-PCIE-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 1000000000,
    "target_qps": 117851,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 48,
    "uid": "cc8bef9498e31b52",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 466073592,
    "90.00 percentile latency (ns)": 727422219,
    "95.00 percentile latency (ns)": 810936575,
    "97.00 percentile latency (ns)": 872434414,
    "99.00 percentile latency (ns)": 991519818,
    "99.90 percentile latency (ns)": 1265565419,
    "Completed samples per second": 35284.26,
    "Max latency (ns)": 1742274157,
    "Mean latency (ns)": 490005325,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 58953113,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Performance constraints satisfied": "Yes",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Scenario": "server",
    "Scheduled samples per second": 35299.01,
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.55000112742113,
    "characteristics.scheduled_queries_per_second": 35299.01,
    "characteristics.scheduled_queries_per_second.normalized_per_core": 4412.37625,
    "characteristics.scheduled_queries_per_second.normalized_per_processor": 4412.37625,
    "characteristics.word error rate": 7.449998872578862,
    "ck_system": "DSS8440_A30x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.scheduled_queries_per_second",
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 48,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6248R CPU @ 3.00GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3 TB",
    "host_storage_type": "NVMe",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 0,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 270336,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/DSS8440_A30x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.2",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DSS8440_A30x8_TRT",
    "system_name": "Dell EMC DSS 8440 (8x A30, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 1000000000,
    "target_qps": 35300,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 96,
    "uid": "eb4a43a9d4869064",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 351581473,
    "90.00 percentile latency (ns)": 509820992,
    "95.00 percentile latency (ns)": 557795370,
    "97.00 percentile latency (ns)": 586769610,
    "99.00 percentile latency (ns)": 644434079,
    "99.90 percentile latency (ns)": 739150474,
    "Completed samples per second": 10887.37,
    "Max latency (ns)": 899944851,
    "Mean latency (ns)": 362262747,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 82373007,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Performance constraints satisfied": "Yes",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Scenario": "server",
    "Scheduled samples per second": 10891.59,
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 3,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.55000112742113,
    "characteristics.scheduled_queries_per_second": 10891.59,
    "characteristics.scheduled_queries_per_second.normalized_per_core": 3630.53,
    "characteristics.scheduled_queries_per_second.normalized_per_processor": 3630.53,
    "characteristics.word error rate": 7.449998872578862,
    "ck_system": "R7525_A30x3_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.scheduled_queries_per_second",
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7763",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 0,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 270336,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 3,
    "normalize_processors": 3,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/R7525_A30x3_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "CentOS 8.4.2105",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/R7525_A30x3_TRT",
    "system_name": "Dell EMC PowerEdge R7525 (3x A30, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 1000000000,
    "target_qps": 10890,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "e0acd593682a43f1",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 463738474,
    "90.00 percentile latency (ns)": 655749992,
    "95.00 percentile latency (ns)": 716523594,
    "97.00 percentile latency (ns)": 751083842,
    "99.00 percentile latency (ns)": 794691169,
    "99.90 percentile latency (ns)": 882905550,
    "Completed samples per second": 6899.69,
    "Max latency (ns)": 1026928934,
    "Mean latency (ns)": 471241208,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 43378328,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Performance constraints satisfied": "Yes",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Scenario": "server",
    "Scheduled samples per second": 6902.14,
    "accelerator_cooling_type": "passive",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 2,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "2.12.3",
    "characteristics.accuracy": 92.5567656542425,
    "characteristics.scheduled_queries_per_second": 6902.14,
    "characteristics.scheduled_queries_per_second.normalized_per_core": 3451.07,
    "characteristics.scheduled_queries_per_second.normalized_per_processor": 3451.07,
    "characteristics.word error rate": 7.4432343457575145,
    "ck_system": "XE2420_A10x2_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.scheduled_queries_per_second",
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "filesystem": "ext3/ext4",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_cooling_type": "air",
    "host_memory_capacity": "384 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 24,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC on",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "5.00.00.00",
    "max_async_queries": 0,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 270336,
    "mlperf_version": 1.1,
    "normalize_cores": 2,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE2420_A10x2_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.04.2",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_settings": "closed/Dell/power/XE2420_A10x2_power_settings.md",
    "print_timestamps": 0,
    "problem": false,
    "psu_details": "2x2000W",
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_cooling_type": "air",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE2420_A10x2_TRT",
    "system_name": "Dell EMC PowerEdge XE2420 (2x A10, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 1000000000,
    "target_qps": 6900,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 48,
    "uid": "4861adfd0e8285ac",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 422344057,
    "90.00 percentile latency (ns)": 529303234,
    "95.00 percentile latency (ns)": 566506657,
    "97.00 percentile latency (ns)": 582514961,
    "99.00 percentile latency (ns)": 623719903,
    "99.90 percentile latency (ns)": 682243563,
    "Completed samples per second": 55172.96,
    "Max latency (ns)": 792076350,
    "Mean latency (ns)": 423713592,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 36806111,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Performance constraints satisfied": "Yes",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Scenario": "server",
    "Scheduled samples per second": 55204.1,
    "accelerator_cooling_type": "",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "2.2.5",
    "characteristics.accuracy": 92.55000112742113,
    "characteristics.power": 2629.9176372712145,
    "characteristics.power.normalized_per_core": 657.4794093178036,
    "characteristics.power.normalized_per_processor": 657.4794093178036,
    "characteristics.scheduled_queries_per_second": 55204.1,
    "characteristics.scheduled_queries_per_second.normalized_per_core": 13801.025,
    "characteristics.scheduled_queries_per_second.normalized_per_processor": 13801.025,
    "characteristics.word error rate": 7.449998872578862,
    "ck_system": "XE8545_A100-SXM-80GBx4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.scheduled_queries_per_second",
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "filesystem": "ext3/ext4",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_cooling_type": "",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7763",
    "host_processors_per_node": 2,
    "host_storage_capacity": "3 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "500W A100-SXM-80GB",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "4.40.40.151",
    "max_async_queries": 0,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 270336,
    "mlperf_version": 1.1,
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/Dell/results/XE8545_A100-SXM-80GBx4_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 20.04.2",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.57.02, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_settings": "closed/Dell/power/XE8545_A100-SXM-80GBx4_power_settings.md",
    "print_timestamps": 0,
    "problem": false,
    "psu_details": "4x2400W",
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "N",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "Dell",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/Dell",
    "sw_notes": "",
    "system_cooling_type": "air",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/XE8545_A100-SXM-80GBx4_TRT",
    "system_name": "Dell EMC PowerEdge XE8545 (4x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 1000000000,
    "target_qps": 55200,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "46904a5ba7e9b193",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  }
]