[
  {
    "50.00 percentile latency (ns)": 433515733982,
    "90.00 percentile latency (ns)": 601914314814,
    "95.00 percentile latency (ns)": 614718476331,
    "97.00 percentile latency (ns)": 619550590012,
    "99.00 percentile latency (ns)": 624266749682,
    "99.90 percentile latency (ns)": 626351304147,
    "Max latency (ns)": 626568551922,
    "Mean latency (ns)": 397355626677,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 4922087724,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 50561.1,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.55000112742113,
    "characteristics.samples_per_second": 50561.1,
    "characteristics.samples_per_second.normalized_per_core": 12640.275,
    "characteristics.samples_per_second.normalized_per_processor": 12640.275,
    "characteristics.word error rate": 7.449998872578862,
    "ck_system": "A100-PCIe-80GB_aarch64x4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.2, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 80,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Ampere Altra Q80-30",
    "host_processors_per_node": 1,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GB_aarch64x4_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.2, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 31680000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GB_aarch64x4_TRT",
    "system_name": "Gigabyte G242-P31 (4x A100-PCIe-80GB_aarch64, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 48000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 80,
    "uid": "212ad7dc984890a1",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 484808210206,
    "90.00 percentile latency (ns)": 672981056642,
    "95.00 percentile latency (ns)": 687035391584,
    "97.00 percentile latency (ns)": 692089370952,
    "99.00 percentile latency (ns)": 696775766019,
    "99.90 percentile latency (ns)": 698580091383,
    "Max latency (ns)": 698705746953,
    "Mean latency (ns)": 445245928095,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 6561196295,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 52595.5,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.55000112742113,
    "characteristics.samples_per_second": 52595.5,
    "characteristics.samples_per_second.normalized_per_core": 6574.4375,
    "characteristics.samples_per_second.normalized_per_processor": 6574.4375,
    "characteristics.word error rate": 7.449998872578862,
    "ck_system": "A30x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 36748799,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 55680,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "f147c6a0107bbd0b",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 423052182619,
    "90.00 percentile latency (ns)": 582061063699,
    "95.00 percentile latency (ns)": 594684954323,
    "97.00 percentile latency (ns)": 599481258646,
    "99.00 percentile latency (ns)": 604229926833,
    "99.90 percentile latency (ns)": 606263733650,
    "Max latency (ns)": 606468588012,
    "Mean latency (ns)": 97604192689,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 14379875243,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 104474,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.samples_per_second": 104474,
    "characteristics.samples_per_second.normalized_per_core": 13059.25,
    "characteristics.samples_per_second.normalized_per_processor": 13059.25,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "A100-PCIe-80GBx8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 63360000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 96000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "01dad14811b092e4",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 488298084165,
    "90.00 percentile latency (ns)": 673922358938,
    "95.00 percentile latency (ns)": 688196092927,
    "97.00 percentile latency (ns)": 693553043693,
    "99.00 percentile latency (ns)": 698710213587,
    "99.90 percentile latency (ns)": 700856874601,
    "Max latency (ns)": 701072882037,
    "Mean latency (ns)": 137093897302,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 10089615109,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 84727.3,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.power": 2203.948644793153,
    "characteristics.power.normalized_per_core": 275.49358059914414,
    "characteristics.power.normalized_per_processor": 275.49358059914414,
    "characteristics.samples_per_second": 84727.3,
    "characteristics.samples_per_second.normalized_per_core": 10590.9125,
    "characteristics.samples_per_second.normalized_per_processor": 10590.9125,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "A100-PCIex8_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 59400000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_MaxQ",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 90000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "674f778845f035d0",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 321353145857,
    "90.00 percentile latency (ns)": 577885004658,
    "95.00 percentile latency (ns)": 609947580852,
    "97.00 percentile latency (ns)": 622772627798,
    "99.00 percentile latency (ns)": 635596685498,
    "99.90 percentile latency (ns)": 641376622741,
    "Max latency (ns)": 641986723878,
    "Mean latency (ns)": 52604417892,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 585598552,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 106918,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.samples_per_second": 106918,
    "characteristics.samples_per_second.normalized_per_core": 13364.75,
    "characteristics.samples_per_second.normalized_per_processor": 13364.75,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 68640000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT",
    "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 104000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "da533488cb62d448",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 439964453749,
    "90.00 percentile latency (ns)": 610049806459,
    "95.00 percentile latency (ns)": 622893745425,
    "97.00 percentile latency (ns)": 627522616430,
    "99.00 percentile latency (ns)": 631804582881,
    "99.90 percentile latency (ns)": 633590193899,
    "Max latency (ns)": 633770932653,
    "Mean latency (ns)": 403234212426,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 5350910201,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 49986.5,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.55225596969493,
    "characteristics.samples_per_second": 49986.5,
    "characteristics.samples_per_second.normalized_per_core": 12496.625,
    "characteristics.samples_per_second.normalized_per_processor": 12496.625,
    "characteristics.word error rate": 7.4477440303050795,
    "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 1,
    "host_storage_capacity": "10 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 31680000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 48000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 64,
    "uid": "eea96addf5b0ac74",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 443217302254,
    "90.00 percentile latency (ns)": 626650080857,
    "95.00 percentile latency (ns)": 641734323466,
    "97.00 percentile latency (ns)": 647018489153,
    "99.00 percentile latency (ns)": 652193911014,
    "99.90 percentile latency (ns)": 654196181826,
    "Max latency (ns)": 654370433042,
    "Mean latency (ns)": 408277584156,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 835265898,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 1348.5,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30 (1x1g.6gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.55000112742113,
    "characteristics.samples_per_second": 1348.5,
    "characteristics.samples_per_second.normalized_per_core": 1348.5,
    "characteristics.samples_per_second.normalized_per_processor": 1348.5,
    "characteristics.word error rate": 7.449998872578862,
    "ck_system": "A30-MIG_1x1g.6gb_TRT_HeteroMultiUse",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.6gb_TRT_HeteroMultiUse",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 882420,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.6gb_TRT_HeteroMultiUse",
    "system_name": "Gigabyte G482-Z54 (1x A30-MIG-1x1g.6gb, TensorRT, HeteroMultiUse)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1337,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "a42f3b7bd42dcc2c",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 442100335605,
    "90.00 percentile latency (ns)": 615398569542,
    "95.00 percentile latency (ns)": 630502961508,
    "97.00 percentile latency (ns)": 636060908245,
    "99.00 percentile latency (ns)": 641203658255,
    "99.90 percentile latency (ns)": 643154003331,
    "Max latency (ns)": 643280971271,
    "Mean latency (ns)": 404641597769,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 792638037,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 1590.28,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.54549144287357,
    "characteristics.samples_per_second": 1590.28,
    "characteristics.samples_per_second.normalized_per_core": 1590.28,
    "characteristics.samples_per_second.normalized_per_processor": 1590.28,
    "characteristics.word error rate": 7.45450855712643,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 1023000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, HeteroMultiUse)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1550,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "4deeb7b906c1d95e",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 495738023943,
    "90.00 percentile latency (ns)": 686182450520,
    "95.00 percentile latency (ns)": 700301260633,
    "97.00 percentile latency (ns)": 705304284189,
    "99.00 percentile latency (ns)": 709946396047,
    "99.90 percentile latency (ns)": 711819534269,
    "Max latency (ns)": 711992040584,
    "Mean latency (ns)": 454195113255,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 5281159616,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 44494.9,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.power": 1180.3205056179763,
    "characteristics.power.normalized_per_core": 295.0801264044941,
    "characteristics.power.normalized_per_processor": 295.0801264044941,
    "characteristics.samples_per_second": 44494.9,
    "characteristics.samples_per_second.normalized_per_core": 11123.725,
    "characteristics.samples_per_second.normalized_per_processor": 11123.725,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 1,
    "host_storage_capacity": "10 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 31680000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 48000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 64,
    "uid": "8f463f433c9e086a",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 481527814909,
    "90.00 percentile latency (ns)": 661010921869,
    "95.00 percentile latency (ns)": 675421615514,
    "97.00 percentile latency (ns)": 680837394976,
    "99.00 percentile latency (ns)": 686019412492,
    "99.90 percentile latency (ns)": 688195339886,
    "Max latency (ns)": 688397565139,
    "Mean latency (ns)": 151063472163,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 17930385993,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 92039.8,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.samples_per_second": 92039.8,
    "characteristics.samples_per_second.normalized_per_core": 11504.975,
    "characteristics.samples_per_second.normalized_per_processor": 11504.975,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "A100-PCIex8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 63360000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 96000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "04be1a357ae31938",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 379704867826,
    "90.00 percentile latency (ns)": 684416962383,
    "95.00 percentile latency (ns)": 722543508862,
    "97.00 percentile latency (ns)": 737792712717,
    "99.00 percentile latency (ns)": 753033774671,
    "99.90 percentile latency (ns)": 759903287234,
    "Max latency (ns)": 760613636224,
    "Mean latency (ns)": 111163205282,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 551511028,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 90242.9,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.power": 3253.9064388961915,
    "characteristics.power.normalized_per_core": 406.73830486202394,
    "characteristics.power.normalized_per_processor": 406.73830486202394,
    "characteristics.samples_per_second": 90242.9,
    "characteristics.samples_per_second.normalized_per_core": 11280.3625,
    "characteristics.samples_per_second.normalized_per_processor": 11280.3625,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 68640000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "system_name": "NVIDIA DGX A100 (8x A100-SXM-80GB, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 104000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "ddd428c83231c044",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 457821311949,
    "90.00 percentile latency (ns)": 628005611173,
    "95.00 percentile latency (ns)": 640963026223,
    "97.00 percentile latency (ns)": 645654503542,
    "99.00 percentile latency (ns)": 649942270753,
    "99.90 percentile latency (ns)": 651573862936,
    "Max latency (ns)": 651658741163,
    "Mean latency (ns)": 418407394805,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 6933691382,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 36460.8,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.samples_per_second": 36460.8,
    "characteristics.samples_per_second.normalized_per_core": 4557.6,
    "characteristics.samples_per_second.normalized_per_processor": 4557.6,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "A10x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 8.0.1, CUDA 11.3",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.1,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 20.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 1624344308455410291,
    "retraining": "No",
    "sample_index_rng_seed": 517984244576520566,
    "samples_per_query": 23760000,
    "schedule_rng_seed": 10051496985653635065,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 36000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 56,
    "uid": "47799f8dd8f41fa7",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 449235480855,
    "90.00 percentile latency (ns)": 625883545463,
    "95.00 percentile latency (ns)": 639125948267,
    "97.00 percentile latency (ns)": 643927450692,
    "99.00 percentile latency (ns)": 648358580416,
    "99.90 percentile latency (ns)": 650087936940,
    "Max latency (ns)": 650216204779,
    "Mean latency (ns)": 412177827144,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 1834118722,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 14007.6,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.54549144287357,
    "characteristics.samples_per_second": 14007.6,
    "characteristics.samples_per_second.normalized_per_core": 14007.6,
    "characteristics.samples_per_second.normalized_per_processor": 14007.6,
    "characteristics.word error rate": 7.45450855712643,
    "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_datacenter",
    "ck_used": true,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 120,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_datacenter",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 9108000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_datacenter",
    "system_name": "Microsoft Corporation 7.0 (Virtual Machine)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 13800,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 240,
    "uid": "32bab74bb35f423d",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 495240979740,
    "90.00 percentile latency (ns)": 684166600662,
    "95.00 percentile latency (ns)": 698594056001,
    "97.00 percentile latency (ns)": 703794140790,
    "99.00 percentile latency (ns)": 708715493774,
    "99.90 percentile latency (ns)": 710771013604,
    "Max latency (ns)": 710947835922,
    "Mean latency (ns)": 453761831423,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 6876479314,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 51689.9,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "24 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A30",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.samples_per_second": 51689.9,
    "characteristics.samples_per_second.normalized_per_core": 6461.2375,
    "characteristics.samples_per_second.normalized_per_processor": 6461.2375,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "A30x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 36748799,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A30, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 55680,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "3aa914a5b92cd8fb",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 442198578944,
    "90.00 percentile latency (ns)": 608172689367,
    "95.00 percentile latency (ns)": 623937005810,
    "97.00 percentile latency (ns)": 630656392888,
    "99.00 percentile latency (ns)": 636758403925,
    "99.90 percentile latency (ns)": 639433200831,
    "Max latency (ns)": 639693721434,
    "Mean latency (ns)": 55103018211,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 9574613028,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 82539.5,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.power": 2278.421562499999,
    "characteristics.power.normalized_per_core": 284.80269531249985,
    "characteristics.power.normalized_per_processor": 284.80269531249985,
    "characteristics.samples_per_second": 82539.5,
    "characteristics.samples_per_second.normalized_per_core": 10317.4375,
    "characteristics.samples_per_second.normalized_per_processor": 10317.4375,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "A100-PCIex8_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 52800000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT_MaxQ",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 80000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "1e524e88b300c34c",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 325179117909,
    "90.00 percentile latency (ns)": 584663309167,
    "95.00 percentile latency (ns)": 617123050345,
    "97.00 percentile latency (ns)": 630092573844,
    "99.00 percentile latency (ns)": 643085405264,
    "99.90 percentile latency (ns)": 648919844208,
    "Max latency (ns)": 649527330494,
    "Mean latency (ns)": 56475882802,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 797665695,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 105677,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.samples_per_second": 105677,
    "characteristics.samples_per_second.normalized_per_core": 13209.625,
    "characteristics.samples_per_second.normalized_per_processor": 13209.625,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 68640000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT",
    "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 104000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "2c3d8d31de4da7ab",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 446476935070,
    "90.00 percentile latency (ns)": 621339537492,
    "95.00 percentile latency (ns)": 635214265056,
    "97.00 percentile latency (ns)": 640402310074,
    "99.00 percentile latency (ns)": 645572365163,
    "99.90 percentile latency (ns)": 647804716796,
    "Max latency (ns)": 648040076957,
    "Mean latency (ns)": 409968302913,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 6305752753,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 48885.9,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.samples_per_second": 48885.9,
    "characteristics.samples_per_second.normalized_per_core": 12221.475,
    "characteristics.samples_per_second.normalized_per_processor": 12221.475,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 1,
    "host_storage_capacity": "10 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 31680000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT",
    "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 48000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 64,
    "uid": "522beb67ce49a984",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 453228416956,
    "90.00 percentile latency (ns)": 630349747678,
    "95.00 percentile latency (ns)": 645758921110,
    "97.00 percentile latency (ns)": 651402488884,
    "99.00 percentile latency (ns)": 656624133961,
    "99.90 percentile latency (ns)": 658617723692,
    "Max latency (ns)": 658745232353,
    "Mean latency (ns)": 414712575341,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 806976719,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 1552.95,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 1,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.54549144287357,
    "characteristics.samples_per_second": 1552.95,
    "characteristics.samples_per_second.normalized_per_core": 1552.95,
    "characteristics.samples_per_second.normalized_per_processor": 1552.95,
    "characteristics.word error rate": 7.45450855712643,
    "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 1,
    "normalize_processors": 1,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 1023000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_HeteroMultiUse",
    "system_name": "NVIDIA DGX-A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, HeteroMultiUse)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 1550,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "0cbb4c2fa40638d6",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 466497886842,
    "90.00 percentile latency (ns)": 643281091098,
    "95.00 percentile latency (ns)": 657372621102,
    "97.00 percentile latency (ns)": 662693028718,
    "99.00 percentile latency (ns)": 667971626947,
    "99.90 percentile latency (ns)": 670204701579,
    "Max latency (ns)": 670406882594,
    "Mean latency (ns)": 426867378049,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 5612295572,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 47254.9,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 4,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.power": 1306.8096870342777,
    "characteristics.power.normalized_per_core": 326.7024217585694,
    "characteristics.power.normalized_per_processor": 326.7024217585694,
    "characteristics.samples_per_second": 47254.9,
    "characteristics.samples_per_second.normalized_per_core": 11813.725,
    "characteristics.samples_per_second.normalized_per_processor": 11813.725,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "512 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 1,
    "host_storage_capacity": "10 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 4,
    "normalize_processors": 4,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 31680000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-Station-A100_A100-SXM-80GBx4_TRT_MaxQ",
    "system_name": "NVIDIA DGX Station A100 (4x A100-SXM-80GB, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 48000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 64,
    "uid": "7b4660bffa8eebb0",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 429178094825,
    "90.00 percentile latency (ns)": 594876233867,
    "95.00 percentile latency (ns)": 611476616662,
    "97.00 percentile latency (ns)": 618086576798,
    "99.00 percentile latency (ns)": 624676598565,
    "99.90 percentile latency (ns)": 627379654541,
    "Max latency (ns)": 627654751630,
    "Mean latency (ns)": 70457737099,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 9949844185,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 90852.5,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40 GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe-40GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.55000112742113,
    "characteristics.samples_per_second": 90852.5,
    "characteristics.samples_per_second.normalized_per_core": 11356.5625,
    "characteristics.samples_per_second.normalized_per_processor": 11356.5625,
    "characteristics.word error rate": 7.449998872578862,
    "ck_system": "A100-PCIex8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 57024000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex8_TRT",
    "system_name": "Gigabyte G482-Z54 (8x A100-PCIe, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 86400,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "b17fa40aeba35e70",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 365844564032,
    "90.00 percentile latency (ns)": 658599394046,
    "95.00 percentile latency (ns)": 695187221453,
    "97.00 percentile latency (ns)": 709804182149,
    "99.00 percentile latency (ns)": 724472507824,
    "99.90 percentile latency (ns)": 731053697244,
    "Max latency (ns)": 731744535808,
    "Mean latency (ns)": 97192289035,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 588701989,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 93803.2,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "80 GB",
    "accelerator_memory_configuration": "HBM2e",
    "accelerator_model_name": "NVIDIA A100-SXM-80GB",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.55000112742113,
    "characteristics.power": 3555.000136798903,
    "characteristics.power.normalized_per_core": 444.3750170998629,
    "characteristics.power.normalized_per_processor": 444.3750170998629,
    "characteristics.samples_per_second": 93803.2,
    "characteristics.samples_per_second.normalized_per_core": 11725.4,
    "characteristics.samples_per_second.normalized_per_processor": 11725.4,
    "characteristics.word error rate": 7.449998872578862,
    "ck_system": "DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "2 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 68640000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx8_TRT_MaxQ",
    "system_name": "NVIDIA DGX-A100 (8x A100-SXM-80GB, MaxQ, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 104000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "4a0598695a7f8528",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 461254118419,
    "90.00 percentile latency (ns)": 634085566551,
    "95.00 percentile latency (ns)": 647141953293,
    "97.00 percentile latency (ns)": 651839210611,
    "99.00 percentile latency (ns)": 656132460722,
    "99.90 percentile latency (ns)": 657760382957,
    "Max latency (ns)": 657888731212,
    "Mean latency (ns)": 421681559177,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 5823528762,
    "Min queries satisfied": "Yes",
    "Mode": "PerformanceOnly",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 36115.5,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA A10",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "boot_firmware_version": "",
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.samples_per_second": 36115.5,
    "characteristics.samples_per_second.normalized_per_core": 4514.4375,
    "characteristics.samples_per_second.normalized_per_processor": 4514.4375,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "A10x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "disk_controllers": "",
    "disk_drives": "",
    "division": "closed",
    "filesystem": "",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2.3, CUDA 11.1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "management_firmware_version": "",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 600000,
    "min_query_count": 1,
    "mlperf_version": 1.0,
    "network_speed_mbit": "",
    "nics_enabled_connected": "",
    "nics_enabled_firmware": "",
    "nics_enabled_os": "",
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x8_TRT",
    "number_of_nodes": 1,
    "number_of_type_nics_installed": "",
    "operating_system": "Ubuntu 18.04.4",
    "other_hardware": "",
    "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0",
    "performance_issue_same": 0,
    "performance_issue_same_index": 0,
    "performance_issue_unique": 0,
    "performance_sample_count": 2513,
    "power_management": "",
    "power_supply_details": "",
    "power_supply_quantity_and_rating_watts": "",
    "print_timestamps": 0,
    "problem": false,
    "qsl_rng_seed": 7322528924094909334,
    "retraining": "N",
    "sample_index_rng_seed": 1570999273408051088,
    "samples_per_query": 23760000,
    "schedule_rng_seed": 3507442325620259414,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "preview",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x8_TRT",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x A10, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 36000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 56,
    "uid": "629f7cc44494ea36",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 47933267074,
    "90.00 percentile latency (ns)": 66530490233,
    "95.00 percentile latency (ns)": 68130250556,
    "97.00 percentile latency (ns)": 68687476725,
    "99.00 percentile latency (ns)": 69205096975,
    "99.90 percentile latency (ns)": 69361332128,
    "Max latency (ns)": 69396954946,
    "Mean latency (ns)": 43997179476,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 927325099,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 28531.5,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 20,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 92.56127533879005,
    "characteristics.samples_per_second": 28531.5,
    "characteristics.samples_per_second.normalized_per_core": 1426.575,
    "characteristics.samples_per_second.normalized_per_processor": 1426.575,
    "characteristics.word error rate": 7.438724661209949,
    "ck_system": "T4x20_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 20,
    "normalize_processors": 20,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x20_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2513,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 1980000,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x20_TRT",
    "system_name": "Supermicro 6049GP-TRT-OTO-29 (20x T4, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 30000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 56,
    "uid": "4993b0c14e47b967",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 51618258655,
    "90.00 percentile latency (ns)": 71501146706,
    "95.00 percentile latency (ns)": 73158191240,
    "97.00 percentile latency (ns)": 73778904610,
    "99.00 percentile latency (ns)": 74353161196,
    "99.90 percentile latency (ns)": 74564351801,
    "Max latency (ns)": 74565208027,
    "Mean latency (ns)": 47307115342,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 525925429,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 17207,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-PCIe",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 2,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 92.5545108119687,
    "characteristics.samples_per_second": 17207,
    "characteristics.samples_per_second.normalized_per_core": 8603.5,
    "characteristics.samples_per_second.normalized_per_processor": 8603.5,
    "characteristics.word error rate": 7.4454891880312974,
    "ck_system": "A100-PCIex2_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 2,
    "normalize_processors": 2,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex2_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2513,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 1283040,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex2_TRT",
    "system_name": "Gigabyte G482-Z52 (2x A100-PCIe, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 19440,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "6c849a384eb4a02c",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 44953368063,
    "90.00 percentile latency (ns)": 61644139901,
    "95.00 percentile latency (ns)": 62995795748,
    "97.00 percentile latency (ns)": 63471035935,
    "99.00 percentile latency (ns)": 63923967631,
    "99.90 percentile latency (ns)": 64067072074,
    "Max latency (ns)": 64076962033,
    "Mean latency (ns)": 41117218633,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 1336672715,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 82400.9,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "40GB",
    "accelerator_memory_configuration": "HBM2",
    "accelerator_model_name": "NVIDIA A100-SXM4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 92.55225596969493,
    "characteristics.samples_per_second": 82400.9,
    "characteristics.samples_per_second.normalized_per_core": 10300.1125,
    "characteristics.samples_per_second.normalized_per_processor": 10300.1125,
    "characteristics.word error rate": 7.4477440303050795,
    "ck_system": "DGX-A100_A100-SXM4x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "1 TB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 64,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "AMD EPYC 7742",
    "host_processors_per_node": 2,
    "host_storage_capacity": "15 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x8_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2513,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 5280000,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x8_TRT",
    "system_name": "NVIDIA DGX-A100 (8x A100-SXM4, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 80000,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 128,
    "uid": "2e76c2cb9456ba26",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  },
  {
    "50.00 percentile latency (ns)": 43595393278,
    "90.00 percentile latency (ns)": 60614607813,
    "95.00 percentile latency (ns)": 61872821916,
    "97.00 percentile latency (ns)": 62366912912,
    "99.00 percentile latency (ns)": 62770158336,
    "99.90 percentile latency (ns)": 62882235729,
    "Max latency (ns)": 62892139601,
    "Mean latency (ns)": 40360979736,
    "Min duration satisfied": "Yes",
    "Min latency (ns)": 1163835971,
    "Min queries satisfied": "Yes",
    "Mode": "Performance",
    "Result is": "VALID",
    "SUT name": "RNNT SERVER",
    "Samples per second": 11963.3,
    "Scenario": "offline",
    "accelerator_frequency": "",
    "accelerator_host_interconnect": "",
    "accelerator_interconnect": "",
    "accelerator_interconnect_topology": "",
    "accelerator_memory_capacity": "16 GB",
    "accelerator_memory_configuration": "GDDR6",
    "accelerator_model_name": "NVIDIA T4",
    "accelerator_on-chip_memories": "",
    "accelerators_per_node": 8,
    "accuracy_log_probability": 0,
    "accuracy_log_rng_seed": 0,
    "accuracy_log_sampling_target": 0,
    "characteristics.accuracy": 92.56127533879005,
    "characteristics.samples_per_second": 11963.3,
    "characteristics.samples_per_second.normalized_per_core": 1495.4125,
    "characteristics.samples_per_second.normalized_per_processor": 1495.4125,
    "characteristics.word error rate": 7.438724661209949,
    "ck_system": "T4x8_TRT",
    "ck_used": false,
    "cooling": "",
    "dataset": "LibriSpeech",
    "dataset_link": "",
    "dim_x_default": "characteristics.samples_per_second",
    "dim_x_maximize": true,
    "dim_y_default": "characteristics.accuracy",
    "dim_y_maximize": true,
    "division": "closed",
    "formal_model": "rnn-t",
    "formal_model_accuracy": 99.0,
    "formal_model_link": "",
    "framework": "TensorRT 7.2, CUDA 11.0 Update 1",
    "host_memory_capacity": "768 GB",
    "host_memory_configuration": "",
    "host_networking": "",
    "host_networking_topology": "",
    "host_processor_caches": "",
    "host_processor_core_count": 28,
    "host_processor_frequency": "",
    "host_processor_interconnect": "",
    "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz",
    "host_processors_per_node": 2,
    "host_storage_capacity": "4 TB",
    "host_storage_type": "NVMe SSD",
    "hw_notes": "ECC off",
    "informal_model": "rnnt",
    "input_data_types": "fp16",
    "key.accuracy": "characteristics.accuracy",
    "max_async_queries": 1,
    "max_duration (ms)": 0,
    "max_query_count": 0,
    "min_duration (ms)": 60000,
    "min_query_count": 1,
    "mlperf_version": 0.7,
    "normalize_cores": 8,
    "normalize_processors": 8,
    "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code",
    "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x8_TRT",
    "number_of_nodes": 1,
    "operating_system": "Ubuntu 18.04.4",
    "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0",
    "performance_issue_same": true,
    "performance_issue_same_index": 0,
    "performance_issue_unique": true,
    "performance_sample_count": 2513,
    "print_timestamps": true,
    "problem": false,
    "qsl_rng_seed": 12786827339337101903,
    "retraining": "N",
    "sample_index_rng_seed": 12640797754436136668,
    "samples_per_query": 752400,
    "schedule_rng_seed": 3135815929913719677,
    "starting_weights_filename": "DistributedDataParallel_1576581068.9962234-epoch-100.pt",
    "status": "available",
    "submitter": "NVIDIA",
    "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA",
    "sw_notes": "",
    "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x8_TRT",
    "system_name": "Supermicro 4029GP-TRT-OTO-28 (8x T4, TensorRT)",
    "system_type": "datacenter",
    "target_latency (ns)": 0,
    "target_qps": 11400,
    "task": "speech recognition",
    "task2": "speech recognition",
    "total_cores": 56,
    "uid": "df28724474a42771",
    "use_accelerator": true,
    "weight_data_types": "fp16",
    "weight_transformations": "quantization, affine fusion"
  }
]