[ { "50.00 percentile latency (ns)": 322967819756, "90.00 percentile latency (ns)": 581611144071, "95.00 percentile latency (ns)": 613957395552, "97.00 percentile latency (ns)": 626848549972, "99.00 percentile latency (ns)": 639740402823, "99.90 percentile latency (ns)": 645549075667, "Max latency (ns)": 646191094997, "Mean latency (ns)": 323004989827, "Min duration satisfied": "Yes", "Min latency (ns)": 127041716, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 19303.9, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 19303.9, "characteristics.samples_per_second.normalized_per_core": 19303.9, "characteristics.samples_per_second.normalized_per_processor": 19303.9, "characteristics.total": 50000, "ck_system": "A30x1_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x1_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 12474000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x1_TRT_Triton", "system_name": "Gigabyte G482-Z54 (1x A30, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 18900, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "cc86494d9d539d11", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 349468277162, "90.00 percentile latency (ns)": 629729967119, "95.00 percentile latency (ns)": 664741887480, "97.00 percentile latency (ns)": 678797571622, "99.00 percentile latency (ns)": 692823334296, "99.90 percentile latency (ns)": 699083218914, "Max latency (ns)": 699836421702, "Mean latency (ns)": 349556832786, "Min duration satisfied": "Yes", "Min latency (ns)": 174203087, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 1092.08, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA Xavier NX", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.062, "characteristics.good": 38031, "characteristics.power": 19.967452857142877, "characteristics.power.normalized_per_core": 19.967452857142877, "characteristics.power.normalized_per_processor": 19.967452857142877, "characteristics.samples_per_second": 1092.08, "characteristics.samples_per_second.normalized_per_core": 1092.08, "characteristics.samples_per_second.normalized_per_processor": 1092.08, "characteristics.total": 50000, "ck_system": "Xavier_NX_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2", "host_memory_capacity": "8 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 6, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "Micro SD Card", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/Xavier_NX_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04", "other_hardware": "", "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 764280, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT_MaxQ", "system_name": "Auvidea JNX30 Xavier NX (MaxQ, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 1158, "task": "image classification", "task2": "image classification", "total_cores": 6, "uid": "48dfcc163355ef63", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 326288470460, "90.00 percentile latency (ns)": 588914614587, "95.00 percentile latency (ns)": 631267055296, "97.00 percentile latency (ns)": 648217611579, "99.00 percentile latency (ns)": 665145599978, "99.90 percentile latency (ns)": 672762621373, "Max latency (ns)": 673583332665, "Mean latency (ns)": 327412630896, "Min duration satisfied": "Yes", "Min latency (ns)": 103262149, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "DLA_Triton_Server", "Samples per second": 1959.67, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA AGX Xavier", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.05, "characteristics.good": 38025, "characteristics.samples_per_second": 1959.67, "characteristics.samples_per_second.normalized_per_core": 1959.67, "characteristics.samples_per_second.normalized_per_processor": 1959.67, "characteristics.total": 50000, "ck_system": "AGX_Xavier_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2", "host_memory_capacity": "32 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 8, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "eMMC 5.1", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04", "other_hardware": "", "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1320000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT_Triton", "system_name": "NVIDIA Jetson AGX Xavier 32GB (TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 2000, "task": "image classification", "task2": "image classification", "total_cores": 8, "uid": "8f7a636346641298", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 328284581222, "90.00 percentile latency (ns)": 591252771414, "95.00 percentile latency (ns)": 624087996103, "97.00 percentile latency (ns)": 637283799410, "99.00 percentile latency (ns)": 650417411957, "99.90 percentile latency (ns)": 656347525913, "Max latency (ns)": 656953993541, "Mean latency (ns)": 328290258942, "Min duration satisfied": "Yes", "Min latency (ns)": 123367292, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 32148.4, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 32148.4, "characteristics.samples_per_second.normalized_per_core": 32148.4, "characteristics.samples_per_second.normalized_per_processor": 32148.4, "characteristics.total": 50000, "ck_system": "A100-PCIex1_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 21120000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT_Triton", "system_name": "Gigabyte G482-Z54 (1x A100-PCIe, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 32000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "8054b694e71860bd", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 353258637116, "90.00 percentile latency (ns)": 635460962183, "95.00 percentile latency (ns)": 670678476430, "97.00 percentile latency (ns)": 684760042819, "99.00 percentile latency (ns)": 698855858777, "99.90 percentile latency (ns)": 705177525431, "Max latency (ns)": 705924832390, "Mean latency (ns)": 353178813958, "Min duration satisfied": "Yes", "Min latency (ns)": 65878989, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 2039.11, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA AGX Xavier", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 2039.11, "characteristics.samples_per_second.normalized_per_core": 2039.11, "characteristics.samples_per_second.normalized_per_processor": 2039.11, "characteristics.total": 50000, "ck_system": "AGX_Xavier_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2", "host_memory_capacity": "32 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 8, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "eMMC 5.1", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04", "other_hardware": "", "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1439460, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT", "system_name": "NVIDIA Jetson AGX Xavier 32GB (TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 2181, "task": "image classification", "task2": "image classification", "total_cores": 8, "uid": "b45f1bffd5baca08", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 327872759360, "90.00 percentile latency (ns)": 590099425137, "95.00 percentile latency (ns)": 622907204589, "97.00 percentile latency (ns)": 635969997780, "99.00 percentile latency (ns)": 649081918183, "99.90 percentile latency (ns)": 655014590489, "Max latency (ns)": 655634610719, "Mean latency (ns)": 327854903878, "Min duration satisfied": "Yes", "Min latency (ns)": 52498870, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 5133.96, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 5133.96, "characteristics.samples_per_second.normalized_per_core": 5133.96, "characteristics.samples_per_second.normalized_per_processor": 5133.96, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 3366000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton", "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 5100, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "d9e32156740e9a25", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 330496946058, "90.00 percentile latency (ns)": 595496193193, "95.00 percentile latency (ns)": 628610482111, "97.00 percentile latency (ns)": 641883721845, "99.00 percentile latency (ns)": 655099294866, "99.90 percentile latency (ns)": 661042222001, "Max latency (ns)": 661696977026, "Mean latency (ns)": 330569082075, "Min duration satisfied": "Yes", "Min latency (ns)": 217468238, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 36905.1, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 36905.1, "characteristics.samples_per_second.normalized_per_core": 36905.1, "characteristics.samples_per_second.normalized_per_processor": 36905.1, "characteristics.total": 50000, "ck_system": "A100-PCIe-80GBx1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx1_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 24420000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx1_TRT", "system_name": "Gigabyte G482-Z54 (1x A100-PCIe-80GB, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 37000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "74d17296b24b30b2", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 305016996243, "90.00 percentile latency (ns)": 548933228747, "95.00 percentile latency (ns)": 579375238446, "97.00 percentile latency (ns)": 591513352082, "99.00 percentile latency (ns)": 603854658781, "99.90 percentile latency (ns)": 609204386915, "Max latency (ns)": 609790033692, "Mean latency (ns)": 305002523442, "Min duration satisfied": "Yes", "Min latency (ns)": 384763347, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 39830.1, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 39830.1, "characteristics.samples_per_second.normalized_per_core": 39830.1, "characteristics.samples_per_second.normalized_per_processor": 39830.1, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 24288000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT", "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "854422c66d74f59a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 291440132056, "90.00 percentile latency (ns)": 553528013308, "95.00 percentile latency (ns)": 590594098771, "97.00 percentile latency (ns)": 605428964925, "99.00 percentile latency (ns)": 620263278138, "99.90 percentile latency (ns)": 626929982400, "Max latency (ns)": 627648946065, "Mean latency (ns)": 297869154774, "Min duration satisfied": "Yes", "Min latency (ns)": 335371536, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "DLA_Triton_Server", "Samples per second": 1156.7, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA Xavier NX", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.05, "characteristics.good": 38025, "characteristics.samples_per_second": 1156.7, "characteristics.samples_per_second.normalized_per_core": 1156.7, "characteristics.samples_per_second.normalized_per_processor": 1156.7, "characteristics.total": 50000, "ck_system": "Xavier_NX_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2", "host_memory_capacity": "8 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 6, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "Micro SD Card", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/Xavier_NX_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04", "other_hardware": "", "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 726000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT_Triton", "system_name": "NVIDIA Jetson Xavier NX (TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 1100, "task": "image classification", "task2": "image classification", "total_cores": 6, "uid": "3479da77377bc69b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 325641686834, "90.00 percentile latency (ns)": 586166374659, "95.00 percentile latency (ns)": 618718734709, "97.00 percentile latency (ns)": 631784218685, "99.00 percentile latency (ns)": 644765108714, "99.90 percentile latency (ns)": 650634681158, "Max latency (ns)": 651243386660, "Mean latency (ns)": 325688808224, "Min duration satisfied": "Yes", "Min latency (ns)": 131489983, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 4965.89, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30 (1x1g.6gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 4965.89, "characteristics.samples_per_second.normalized_per_core": 4965.89, "characteristics.samples_per_second.normalized_per_processor": 4965.89, "characteristics.total": 50000, "ck_system": "A30-MIG_1x1g.6gb_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.6gb_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 3234000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.6gb_TRT", "system_name": "Gigabyte G482-Z54 (1x A30-MIG-1x1g.6gb, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 4900, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "f0416fa8eaaa72e0", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 335346945065, "90.00 percentile latency (ns)": 603305056926, "95.00 percentile latency (ns)": 636810784375, "97.00 percentile latency (ns)": 650206943937, "99.00 percentile latency (ns)": 663611451324, "99.90 percentile latency (ns)": 669648558129, "Max latency (ns)": 670282144159, "Mean latency (ns)": 335333693261, "Min duration satisfied": "Yes", "Min latency (ns)": 157765404, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 1506.53, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA AGX Xavier", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 75.99, "characteristics.good": 37995, "characteristics.power": 25.240244776119393, "characteristics.power.normalized_per_core": 25.240244776119393, "characteristics.power.normalized_per_processor": 25.240244776119393, "characteristics.samples_per_second": 1506.53, "characteristics.samples_per_second.normalized_per_core": 1506.53, "characteristics.samples_per_second.normalized_per_processor": 1506.53, "characteristics.total": 50000, "ck_system": "AGX_Xavier_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2", "host_memory_capacity": "32 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 8, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "eMMC 5.1", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04", "other_hardware": "", "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 1009800, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT_MaxQ", "system_name": "Auvidea X220-LC AGX Xavier 32GB (MaxQ, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 1530, "task": "image classification", "task2": "image classification", "total_cores": 8, "uid": "bf428cb0fdda363e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 330468785751, "90.00 percentile latency (ns)": 595352871940, "95.00 percentile latency (ns)": 628455726712, "97.00 percentile latency (ns)": 641727318295, "99.00 percentile latency (ns)": 654947293450, "99.90 percentile latency (ns)": 660889549630, "Max latency (ns)": 661587711846, "Mean latency (ns)": 330537834723, "Min duration satisfied": "Yes", "Min latency (ns)": 74421072, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 36911.2, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 36911.2, "characteristics.samples_per_second.normalized_per_core": 36911.2, "characteristics.samples_per_second.normalized_per_processor": 36911.2, "characteristics.total": 50000, "ck_system": "A100-PCIe-80GBx1_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIe-80GBx1_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 24420000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIe-80GBx1_TRT_Triton", "system_name": "Gigabyte G482-Z54 (1x A100-PCIe-80GB, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 37000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "d5cc484889286ae6", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 310635832982, "90.00 percentile latency (ns)": 558943385836, "95.00 percentile latency (ns)": 589912904050, "97.00 percentile latency (ns)": 602355906049, "99.00 percentile latency (ns)": 614800502270, "99.90 percentile latency (ns)": 620358270719, "Max latency (ns)": 620945049877, "Mean latency (ns)": 310615449448, "Min duration satisfied": "Yes", "Min latency (ns)": 82494137, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 13817.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 13817.6, "characteristics.samples_per_second.normalized_per_core": 13817.6, "characteristics.samples_per_second.normalized_per_processor": 13817.6, "characteristics.total": 50000, "ck_system": "A10x1_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x1_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 8580000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x1_TRT_Triton", "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x A10, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 13000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "4c7abc4f55026aaf", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 305416316387, "90.00 percentile latency (ns)": 549788000721, "95.00 percentile latency (ns)": 580335217881, "97.00 percentile latency (ns)": 592542799302, "99.00 percentile latency (ns)": 604752061414, "99.90 percentile latency (ns)": 610263600972, "Max latency (ns)": 610850202110, "Mean latency (ns)": 305428390107, "Min duration satisfied": "Yes", "Min latency (ns)": 87278663, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 39761, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 39761, "characteristics.samples_per_second.normalized_per_core": 39761.0, "characteristics.samples_per_second.normalized_per_processor": 39761.0, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 24288000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_Triton", "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "fb6319d962d41443", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 310813514942, "90.00 percentile latency (ns)": 559428727743, "95.00 percentile latency (ns)": 590434364590, "97.00 percentile latency (ns)": 602893100358, "99.00 percentile latency (ns)": 615354502280, "99.90 percentile latency (ns)": 620916724785, "Max latency (ns)": 621503988450, "Mean latency (ns)": 310830312799, "Min duration satisfied": "Yes", "Min latency (ns)": 186420392, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 13805.2, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 13805.2, "characteristics.samples_per_second.normalized_per_core": 13805.2, "characteristics.samples_per_second.normalized_per_processor": 13805.2, "characteristics.total": 50000, "ck_system": "A10x1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A10x1_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 8580000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x1_TRT", "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x A10, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 13000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "2fd910e581c1f45b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 328974940263, "90.00 percentile latency (ns)": 592026562371, "95.00 percentile latency (ns)": 624811427781, "97.00 percentile latency (ns)": 637932953264, "99.00 percentile latency (ns)": 651226267188, "99.90 percentile latency (ns)": 657113460441, "Max latency (ns)": 657650657247, "Mean latency (ns)": 328902513748, "Min duration satisfied": "Yes", "Min latency (ns)": 301240215, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 18265, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 18265, "characteristics.samples_per_second.normalized_per_core": 18265.0, "characteristics.samples_per_second.normalized_per_processor": 18265.0, "characteristics.total": 50000, "ck_system": "A30x1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30x1_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 12012000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x1_TRT", "system_name": "Gigabyte G482-Z54 (1x A30, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 18200, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "bc921a82c3ee4bd7", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 316686070606, "90.00 percentile latency (ns)": 569861695580, "95.00 percentile latency (ns)": 601547107136, "97.00 percentile latency (ns)": 614162450789, "99.00 percentile latency (ns)": 626874650552, "99.90 percentile latency (ns)": 632557726894, "Max latency (ns)": 633159375455, "Mean latency (ns)": 316642232718, "Min duration satisfied": "Yes", "Min latency (ns)": 116792421, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 5316.2, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 5316.2, "characteristics.samples_per_second.normalized_per_core": 5316.2, "characteristics.samples_per_second.normalized_per_processor": 5316.2, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 3366000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT", "system_name": "NVIDIA DGX A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 5100, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "d2e3b060f4f0292b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 313026382610, "90.00 percentile latency (ns)": 563391048535, "95.00 percentile latency (ns)": 594659721149, "97.00 percentile latency (ns)": 607211681184, "99.00 percentile latency (ns)": 619697849775, "99.90 percentile latency (ns)": 625345383246, "Max latency (ns)": 625931508388, "Mean latency (ns)": 313057351521, "Min duration satisfied": "Yes", "Min latency (ns)": 113948517, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 5166.7, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30 (1x1g.6gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 5166.7, "characteristics.samples_per_second.normalized_per_core": 5166.7, "characteristics.samples_per_second.normalized_per_processor": 5166.7, "characteristics.total": 50000, "ck_system": "A30-MIG_1x1g.6gb_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.6gb_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0, Triton 21.07", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 3234000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.6gb_TRT_Triton", "system_name": "Gigabyte G482-Z54 (1x A30-MIG-1x1g.6gb, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 4900, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "ab3fb3493781ee60", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 322064794338, "90.00 percentile latency (ns)": 579908438152, "95.00 percentile latency (ns)": 612096897437, "97.00 percentile latency (ns)": 625032248202, "99.00 percentile latency (ns)": 637910885494, "99.90 percentile latency (ns)": 643724337808, "Max latency (ns)": 644319062549, "Mean latency (ns)": 322068308495, "Min duration satisfied": "Yes", "Min latency (ns)": 244408380, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 32778.8, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.042, "characteristics.good": 38021, "characteristics.samples_per_second": 32778.8, "characteristics.samples_per_second.normalized_per_core": 32778.8, "characteristics.samples_per_second.normalized_per_processor": 32778.8, "characteristics.total": 50000, "ck_system": "A100-PCIex1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 8.0.1, CUDA 11.3", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 20.04.4", "other_hardware": "", "other_software_stack": "TensorRT 8.0.1, CUDA 11.3, cuDNN 8.2.1, Driver 470.42.01, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 21120000, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT", "system_name": "Gigabyte G482-Z54 (1x A100-PCIe, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 32000, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "f5d2323114c22889", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 306930141746, "90.00 percentile latency (ns)": 552464208708, "95.00 percentile latency (ns)": 583131704588, "97.00 percentile latency (ns)": 595424284378, "99.00 percentile latency (ns)": 607677499518, "99.90 percentile latency (ns)": 613244782121, "Max latency (ns)": 613830033056, "Mean latency (ns)": 306972158930, "Min duration satisfied": "Yes", "Min latency (ns)": 153883401, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 1245.1, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA Xavier NX", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.002, "characteristics.good": 38001, "characteristics.samples_per_second": 1245.1, "characteristics.samples_per_second.normalized_per_core": 1245.1, "characteristics.samples_per_second.normalized_per_processor": 1245.1, "characteristics.total": 50000, "ck_system": "Xavier_NX_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2", "host_memory_capacity": "8 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 6, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "Micro SD Card", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline scenario", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.1, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.1/tree/master/closed/NVIDIA/results/Xavier_NX_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04", "other_hardware": "", "other_software_stack": "JetPack 4.6, TensorRT 8.0.1, CUDA 10.2, cuDNN 8.2.3, DALI 0.31.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 1624344308455410291, "retraining": "No", "sample_index_rng_seed": 517984244576520566, "samples_per_query": 764280, "schedule_rng_seed": 10051496985653635065, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT", "system_name": "NVIDIA Jetson Xavier NX (TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 1158, "task": "image classification", "task2": "image classification", "total_cores": 6, "uid": "9242f2142474d65b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 351462896747, "90.00 percentile latency (ns)": 632711750174, "95.00 percentile latency (ns)": 667950963087, "97.00 percentile latency (ns)": 681981011585, "99.00 percentile latency (ns)": 696015262434, "99.90 percentile latency (ns)": 702334702893, "Max latency (ns)": 703035660372, "Mean latency (ns)": 351466255336, "Min duration satisfied": "Yes", "Min latency (ns)": 137572234, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 17743.1, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 17743.1, "characteristics.samples_per_second.normalized_per_core": 17743.1, "characteristics.samples_per_second.normalized_per_processor": 17743.1, "characteristics.total": 50000, "ck_system": "A30x1_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x1_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 12474000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x1_TRT_Triton", "system_name": "Gigabyte G482-Z54 (1x A30, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 18900, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "088957a728bd443c", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 351474152001, "90.00 percentile latency (ns)": 632512736535, "95.00 percentile latency (ns)": 667623637550, "97.00 percentile latency (ns)": 681660495379, "99.00 percentile latency (ns)": 695670043790, "99.90 percentile latency (ns)": 702012637829, "Max latency (ns)": 702671319689, "Mean latency (ns)": 351446696742, "Min duration satisfied": "Yes", "Min latency (ns)": 201415526, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 1087.68, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA Xavier NX", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.056, "characteristics.good": 38028, "characteristics.power": 19.697395448079668, "characteristics.power.normalized_per_core": 19.697395448079668, "characteristics.power.normalized_per_processor": 19.697395448079668, "characteristics.samples_per_second": 1087.68, "characteristics.samples_per_second.normalized_per_core": 1087.68, "characteristics.samples_per_second.normalized_per_processor": 1087.68, "characteristics.total": 50000, "ck_system": "Xavier_NX_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2", "host_memory_capacity": "8 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 6, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "Micro SD Card", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Xavier_NX_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2, cuDNN 8.0.0, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 764280, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT_MaxQ", "system_name": "NVIDIA Jetson Xavier NX (MaxQ, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 1158, "task": "image classification", "task2": "image classification", "total_cores": 6, "uid": "0ae50e22414c6a72", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 385387293599, "90.00 percentile latency (ns)": 694620171331, "95.00 percentile latency (ns)": 733282194189, "97.00 percentile latency (ns)": 748737047233, "99.00 percentile latency (ns)": 764184379349, "99.90 percentile latency (ns)": 771163887138, "Max latency (ns)": 771904835642, "Mean latency (ns)": 385490698525, "Min duration satisfied": "Yes", "Min latency (ns)": 131809238, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 31465, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 31465, "characteristics.samples_per_second.normalized_per_core": 31465.0, "characteristics.samples_per_second.normalized_per_processor": 31465.0, "characteristics.total": 50000, "ck_system": "A100-PCIex1_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 24288000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT_Triton", "system_name": "Gigabyte G482-Z54 (1x A100-PCIe, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "7ccdf8d35d40420f", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 347633525564, "90.00 percentile latency (ns)": 625493747051, "95.00 percentile latency (ns)": 660132023394, "97.00 percentile latency (ns)": 673971062890, "99.00 percentile latency (ns)": 687925655654, "99.90 percentile latency (ns)": 694191665306, "Max latency (ns)": 694885933720, "Mean latency (ns)": 347619529025, "Min duration satisfied": "Yes", "Min latency (ns)": 69029256, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 2071.51, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA AGX Xavier", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.074, "characteristics.good": 38037, "characteristics.samples_per_second": 2071.51, "characteristics.samples_per_second.normalized_per_core": 2071.51, "characteristics.samples_per_second.normalized_per_processor": 2071.51, "characteristics.total": 50000, "ck_system": "AGX_Xavier_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2", "host_memory_capacity": "32 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 8, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "eMMC 5.1", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2, cuDNN 8.0.0, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1439460, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT", "system_name": "NVIDIA Jetson AGX Xavier 32GB (TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 2181, "task": "image classification", "task2": "image classification", "total_cores": 8, "uid": "5026ec8d0a7f44d2", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 319633640398, "90.00 percentile latency (ns)": 575242056333, "95.00 percentile latency (ns)": 607135182102, "97.00 percentile latency (ns)": 619844492931, "99.00 percentile latency (ns)": 632778921274, "99.90 percentile latency (ns)": 638382136860, "Max latency (ns)": 638995757223, "Mean latency (ns)": 319637175007, "Min duration satisfied": "Yes", "Min latency (ns)": 431233807, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 38009.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 38009.6, "characteristics.samples_per_second.normalized_per_core": 38009.6, "characteristics.samples_per_second.normalized_per_processor": 38009.6, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_edge", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_edge", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 24288000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_edge", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "78aba7f8f73bbc56", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 333259113598, "90.00 percentile latency (ns)": 599783376511, "95.00 percentile latency (ns)": 633129884119, "97.00 percentile latency (ns)": 646409518593, "99.00 percentile latency (ns)": 659738273915, "99.90 percentile latency (ns)": 665769341267, "Max latency (ns)": 666399739872, "Mean latency (ns)": 333238077054, "Min duration satisfied": "Yes", "Min latency (ns)": 52996588, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 5051.02, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 5051.02, "characteristics.samples_per_second.normalized_per_core": 5051.02, "characteristics.samples_per_second.normalized_per_processor": 5051.02, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 3366000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT_Triton", "system_name": "NVIDIA DGX-A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 5100, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "fb1d93b26450e99a", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 359795986750, "90.00 percentile latency (ns)": 647400324820, "95.00 percentile latency (ns)": 683379334067, "97.00 percentile latency (ns)": 697778700990, "99.00 percentile latency (ns)": 712163619656, "99.90 percentile latency (ns)": 718640631114, "Max latency (ns)": 719325342693, "Mean latency (ns)": 359748305745, "Min duration satisfied": "Yes", "Min latency (ns)": 141929075, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 1403.82, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA AGX Xavier", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 75.942, "characteristics.good": 37971, "characteristics.power": 25.204840055632843, "characteristics.power.normalized_per_core": 25.204840055632843, "characteristics.power.normalized_per_processor": 25.204840055632843, "characteristics.samples_per_second": 1403.82, "characteristics.samples_per_second.normalized_per_core": 1403.82, "characteristics.samples_per_second.normalized_per_processor": 1403.82, "characteristics.total": 50000, "ck_system": "AGX_Xavier_TRT_MaxQ", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2", "host_memory_capacity": "32 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 8, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "eMMC 5.1", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT_MaxQ", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2, cuDNN 8.0.0, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 1009800, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT_MaxQ", "system_name": "NVIDIA Jetson AGX Xavier 32GB (MaxQ, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 1530, "task": "image classification", "task2": "image classification", "total_cores": 8, "uid": "b159e0d5aa356b82", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 330103498059, "90.00 percentile latency (ns)": 594138200769, "95.00 percentile latency (ns)": 627148064306, "97.00 percentile latency (ns)": 640362990917, "99.00 percentile latency (ns)": 653578286906, "99.90 percentile latency (ns)": 659532311093, "Max latency (ns)": 660150780953, "Mean latency (ns)": 330119998291, "Min duration satisfied": "Yes", "Min latency (ns)": 70909323, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 4049.07, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30 (1x1g.3gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 4049.07, "characteristics.samples_per_second.normalized_per_core": 4049.07, "characteristics.samples_per_second.normalized_per_processor": 4049.07, "characteristics.total": 50000, "ck_system": "A30-MIG_1x1g.3gb_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.3gb_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 2673000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.3gb_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 4050, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "588bb461d3ec9c54", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 322358998321, "90.00 percentile latency (ns)": 581112417823, "95.00 percentile latency (ns)": 613386079132, "97.00 percentile latency (ns)": 626353383600, "99.00 percentile latency (ns)": 639321866318, "99.90 percentile latency (ns)": 645112939687, "Max latency (ns)": 645723538232, "Mean latency (ns)": 322466184189, "Min duration satisfied": "Yes", "Min latency (ns)": 84497360, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 13287.4, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 13287.4, "characteristics.samples_per_second.normalized_per_core": 13287.4, "characteristics.samples_per_second.normalized_per_processor": 13287.4, "characteristics.total": 50000, "ck_system": "A10x1_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x1_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 8580000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x1_TRT_Triton", "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x A10, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 13000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "d9d881f604683875", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 319453417540, "90.00 percentile latency (ns)": 575090323108, "95.00 percentile latency (ns)": 607050125337, "97.00 percentile latency (ns)": 619817296654, "99.00 percentile latency (ns)": 632585932327, "99.90 percentile latency (ns)": 638350012152, "Max latency (ns)": 638963355886, "Mean latency (ns)": 319466977522, "Min duration satisfied": "Yes", "Min latency (ns)": 90520472, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 38011.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 38011.6, "characteristics.samples_per_second.normalized_per_core": 38011.6, "characteristics.samples_per_second.normalized_per_processor": 38011.6, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GBx1_TRT_Triton_edge", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GBx1_TRT_Triton_edge", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0, Triton 21.02; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 24288000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GBx1_TRT_Triton_edge", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "c7542058d1628a0e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 322651684294, "90.00 percentile latency (ns)": 581618445085, "95.00 percentile latency (ns)": 613917015909, "97.00 percentile latency (ns)": 626897189615, "99.00 percentile latency (ns)": 639873975224, "99.90 percentile latency (ns)": 645668715420, "Max latency (ns)": 646281200290, "Mean latency (ns)": 322772677752, "Min duration satisfied": "Yes", "Min latency (ns)": 192028133, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 13276, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA A10", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 13276, "characteristics.samples_per_second.normalized_per_core": 13276.0, "characteristics.samples_per_second.normalized_per_processor": 13276.0, "characteristics.total": 50000, "ck_system": "A10x1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A10x1_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 8580000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A10x1_TRT", "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x A10, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 13000, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "1a8e5524e47e18f6", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 343713652714, "90.00 percentile latency (ns)": 618487370341, "95.00 percentile latency (ns)": 652750871925, "97.00 percentile latency (ns)": 666454812223, "99.00 percentile latency (ns)": 680334144311, "99.90 percentile latency (ns)": 686482783588, "Max latency (ns)": 687042061363, "Mean latency (ns)": 343612980214, "Min duration satisfied": "Yes", "Min latency (ns)": 329693970, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 17483.6, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 17483.6, "characteristics.samples_per_second.normalized_per_core": 17483.6, "characteristics.samples_per_second.normalized_per_processor": 17483.6, "characteristics.total": 50000, "ck_system": "A30x1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30x1_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 12012000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30x1_TRT", "system_name": "Gigabyte G482-Z54 (1x A30, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 18200, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "d45d69cc3d1d2428", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 330818236190, "90.00 percentile latency (ns)": 595448633336, "95.00 percentile latency (ns)": 628533907593, "97.00 percentile latency (ns)": 641776416499, "99.00 percentile latency (ns)": 655018803476, "99.90 percentile latency (ns)": 660985766011, "Max latency (ns)": 661605457357, "Mean latency (ns)": 330839596872, "Min duration satisfied": "Yes", "Min latency (ns)": 68592672, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 4040.17, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "24 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A30 (1x1g.3gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 4040.17, "characteristics.samples_per_second.normalized_per_core": 4040.17, "characteristics.samples_per_second.normalized_per_processor": 4040.17, "characteristics.total": 50000, "ck_system": "A30-MIG_1x1g.3gb_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A30-MIG_1x1g.3gb_TRT_Triton", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.46, DALI 0.30.0, Triton 21.02; GCC 7.5.0; Python 3.7.10", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 2673000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "preview", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A30-MIG_1x1g.3gb_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 4050, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "c91f645a879d733f", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 325544107864, "90.00 percentile latency (ns)": 585854500469, "95.00 percentile latency (ns)": 618436246369, "97.00 percentile latency (ns)": 631407670675, "99.00 percentile latency (ns)": 644478740895, "99.90 percentile latency (ns)": 650322197744, "Max latency (ns)": 650939350702, "Mean latency (ns)": 325514934710, "Min duration satisfied": "Yes", "Min latency (ns)": 123821126, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 5170.99, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "80 GB", "accelerator_memory_configuration": "HBM2e", "accelerator_model_name": "NVIDIA A100-SXM-80GB (1x1g.10gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 5170.99, "characteristics.samples_per_second.normalized_per_core": 5170.99, "characteristics.samples_per_second.normalized_per_processor": 5170.99, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "2 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 3366000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM-80GB-MIG_1x1g.10gb_TRT", "system_name": "NVIDIA DGX-A100 (1x A100-SXM-80GB-MIG-1x1g.10gb, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 5100, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "787114f61be67af2", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 390704595246, "90.00 percentile latency (ns)": 703745866595, "95.00 percentile latency (ns)": 742806909382, "97.00 percentile latency (ns)": 758417278824, "99.00 percentile latency (ns)": 774272925773, "99.90 percentile latency (ns)": 781131661243, "Max latency (ns)": 781881456198, "Mean latency (ns)": 390777143303, "Min duration satisfied": "Yes", "Min latency (ns)": 515177038, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 31063.5, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40 GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe-40GB", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.03, "characteristics.good": 38015, "characteristics.samples_per_second": 31063.5, "characteristics.samples_per_second.normalized_per_core": 31063.5, "characteristics.samples_per_second.normalized_per_processor": 31063.5, "characteristics.total": 50000, "ck_system": "A100-PCIex1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2.3, CUDA 11.1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "TensorRT 7.2.3, CUDA 11.1, cuDNN 8.1.1, Driver 460.32.03, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 24288000, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT", "system_name": "Gigabyte G482-Z54 (1x A100-PCIe, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "338fc3a40008e246", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 328123766723, "90.00 percentile latency (ns)": 590607789167, "95.00 percentile latency (ns)": 623393529243, "97.00 percentile latency (ns)": 636533771976, "99.00 percentile latency (ns)": 649604984238, "99.90 percentile latency (ns)": 655551053365, "Max latency (ns)": 656182507951, "Mean latency (ns)": 328134807863, "Min duration satisfied": "Yes", "Min latency (ns)": 100736136, "Min queries satisfied": "Yes", "Mode": "PerformanceOnly", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 1164.74, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA Xavier NX", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "boot_firmware_version": "", "characteristics.accuracy": 76.008, "characteristics.good": 38004, "characteristics.samples_per_second": 1164.74, "characteristics.samples_per_second.normalized_per_core": 1164.74, "characteristics.samples_per_second.normalized_per_processor": 1164.74, "characteristics.total": 50000, "ck_system": "Xavier_NX_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "disk_controllers": "", "disk_drives": "", "division": "closed", "filesystem": "", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2", "host_memory_capacity": "8 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 6, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32 GB", "host_storage_type": "Micro SD Card", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "management_firmware_version": "", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 600000, "min_query_count": 1, "mlperf_version": 1.0, "network_speed_mbit": "", "nics_enabled_connected": "", "nics_enabled_firmware": "", "nics_enabled_os": "", "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v1.0/tree/master/closed/NVIDIA/results/Xavier_NX_TRT", "number_of_nodes": 1, "number_of_type_nics_installed": "", "operating_system": "Ubuntu 18.04.4", "other_hardware": "", "other_software_stack": "21.03 Jetson CUDA-X AI Developer Preview, TensorRT 7.2.3, CUDA 10.2, cuDNN 8.0.0, DALI 0.30.0", "performance_issue_same": 0, "performance_issue_same_index": 0, "performance_issue_unique": 0, "performance_sample_count": 2048, "power_management": "", "power_supply_details": "", "power_supply_quantity_and_rating_watts": "", "print_timestamps": 0, "problem": false, "qsl_rng_seed": 7322528924094909334, "retraining": "N", "sample_index_rng_seed": 1570999273408051088, "samples_per_query": 764280, "schedule_rng_seed": 3507442325620259414, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT", "system_name": "NVIDIA Jetson Xavier NX (TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 1158, "task": "image classification", "task2": "image classification", "total_cores": 6, "uid": "940ccb4ed707f8e3", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32528506943, "90.00 percentile latency (ns)": 58363474181, "95.00 percentile latency (ns)": 61640343107, "97.00 percentile latency (ns)": 62952835737, "99.00 percentile latency (ns)": 64262605329, "99.90 percentile latency (ns)": 64803192636, "Max latency (ns)": 64803451601, "Mean latency (ns)": 32488938767, "Min duration satisfied": "Yes", "Min latency (ns)": 231712947, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 37479.5, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM4", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 37479.5, "characteristics.samples_per_second.normalized_per_core": 37479.5, "characteristics.samples_per_second.normalized_per_processor": 37479.5, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM4x1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x1_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 2428800, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x1_TRT", "system_name": "NVIDIA DGX-A100 (1x A100-SXM4, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "f5b9b74896a048bc", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 247516698022, "90.00 percentile latency (ns)": 445525416180, "95.00 percentile latency (ns)": 470307505775, "97.00 percentile latency (ns)": 480168307095, "99.00 percentile latency (ns)": 490081844695, "99.90 percentile latency (ns)": 494568340229, "Max latency (ns)": 495012769998, "Mean latency (ns)": 247531985092, "Min duration satisfied": "Yes", "Min latency (ns)": 56202692, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 4906.54, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "5GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM4 (1x1g.5gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 4906.54, "characteristics.samples_per_second.normalized_per_core": 4906.54, "characteristics.samples_per_second.normalized_per_processor": 4906.54, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 2428800, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "0bf791407481db12", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 36212344586, "90.00 percentile latency (ns)": 65392056636, "95.00 percentile latency (ns)": 69027857523, "97.00 percentile latency (ns)": 70508313208, "99.00 percentile latency (ns)": 71987853538, "99.90 percentile latency (ns)": 72610744892, "Max latency (ns)": 72669258464, "Mean latency (ns)": 36275886656, "Min duration satisfied": "Yes", "Min latency (ns)": 81792095, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 33422.7, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 33422.7, "characteristics.samples_per_second.normalized_per_core": 33422.7, "characteristics.samples_per_second.normalized_per_processor": 33422.7, "characteristics.total": 50000, "ck_system": "A100-PCIex1_TRT_Triton", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 2428800, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT_Triton", "system_name": "Gigabyte G482-Z52 (1x A100-PCIe, TensorRT, Triton)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "ad8b3af49a53ef89", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 34680455964, "90.00 percentile latency (ns)": 62399971442, "95.00 percentile latency (ns)": 65896640134, "97.00 percentile latency (ns)": 67286382889, "99.00 percentile latency (ns)": 68684653619, "99.90 percentile latency (ns)": 69289858533, "Max latency (ns)": 69387274072, "Mean latency (ns)": 34678919143, "Min duration satisfied": "Yes", "Min latency (ns)": 47291628, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 2074.53, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA AGX Xavier", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 76.004, "characteristics.good": 38002, "characteristics.samples_per_second": 2074.53, "characteristics.samples_per_second.normalized_per_core": 2074.53, "characteristics.samples_per_second.normalized_per_processor": 2074.53, "characteristics.total": 50000, "ck_system": "AGX_Xavier_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "20.09 Jetson CUDA-X AI Developer Preview, TensorRT 7.2, CUDA 10.2", "host_memory_capacity": "32GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 8, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32GB", "host_storage_type": "eMMC 5.1", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/AGX_Xavier_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "20.09 Jetson CUDA-X AI Developer Preview, TensorRT 7.2, CUDA 10.2, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 143946, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/AGX_Xavier_TRT", "system_name": "NVIDIA Jetson AGX Xavier 32GB (TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 2181, "task": "image classification", "task2": "image classification", "total_cores": 8, "uid": "472b7c32d244e3ba", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32302534797, "90.00 percentile latency (ns)": 58230343632, "95.00 percentile latency (ns)": 61451645596, "97.00 percentile latency (ns)": 62759933097, "99.00 percentile latency (ns)": 64068780359, "99.90 percentile latency (ns)": 64615804441, "Max latency (ns)": 64667616920, "Mean latency (ns)": 32341275416, "Min duration satisfied": "Yes", "Min latency (ns)": 77394751, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 37558.2, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM4", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 37558.2, "characteristics.samples_per_second.normalized_per_core": 37558.2, "characteristics.samples_per_second.normalized_per_processor": 37558.2, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM4x1_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x1_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 2428800, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x1_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "7d23cf4a83a93fdf", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 244544780257, "90.00 percentile latency (ns)": 440205887834, "95.00 percentile latency (ns)": 464635770058, "97.00 percentile latency (ns)": 474427615236, "99.00 percentile latency (ns)": 484222988670, "99.90 percentile latency (ns)": 488654901721, "Max latency (ns)": 489042139072, "Mean latency (ns)": 244581352269, "Min duration satisfied": "Yes", "Min latency (ns)": 121269449, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 4966.44, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "5GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-SXM4 (1x1g.5gb MIG)", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 4966.44, "characteristics.samples_per_second.normalized_per_core": 4966.44, "characteristics.samples_per_second.normalized_per_processor": 4966.44, "characteristics.total": 50000, "ck_system": "DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "1 TB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "15 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 2428800, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/DGX-A100_A100-SXM4x1-MIG_1x1g.5gb_TRT", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "aa930f2fd469cb1e", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33457832988, "90.00 percentile latency (ns)": 60278769159, "95.00 percentile latency (ns)": 63658462115, "97.00 percentile latency (ns)": 64983405720, "99.00 percentile latency (ns)": 66311664833, "99.90 percentile latency (ns)": 66953511856, "Max latency (ns)": 66982998730, "Mean latency (ns)": 33479817621, "Min duration satisfied": "Yes", "Min latency (ns)": 76266622, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "Triton_Server", "Samples per second": 6010.48, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 6010.48, "characteristics.samples_per_second.normalized_per_core": 6010.48, "characteristics.samples_per_second.normalized_per_processor": 6010.48, "characteristics.total": 50000, "ck_system": "T4x1_TRT_Triton", "ck_used": true, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 120, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7V13 64-Core Processor", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x1_TRT_Triton", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.5 LTS (Linux-5.4.0-1055-azure-x86_64-with-Ubuntu-18.04-bionic)", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0, Triton 20.09; GCC 7.5.0; Python 3.7.10", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 402600, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "Powered by CK v2.5.8 (https://github.com/ctuning/ck)", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x1_TRT_Triton", "system_name": "Microsoft Corporation 7.0 (Virtual Machine)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 6100, "task": "image classification", "task2": "image classification", "total_cores": 240, "uid": "b5862fac7ff4b29b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 33255380983, "90.00 percentile latency (ns)": 60015237975, "95.00 percentile latency (ns)": 63390155083, "97.00 percentile latency (ns)": 64712811909, "99.00 percentile latency (ns)": 66035478769, "99.90 percentile latency (ns)": 66675066906, "Max latency (ns)": 66704545042, "Mean latency (ns)": 33292417809, "Min duration satisfied": "Yes", "Min latency (ns)": 41964764, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 6035.57, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "16 GB", "accelerator_memory_configuration": "GDDR6", "accelerator_model_name": "NVIDIA T4", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 6035.57, "characteristics.samples_per_second.normalized_per_core": 6035.57, "characteristics.samples_per_second.normalized_per_processor": 6035.57, "characteristics.total": 50000, "ck_system": "T4x1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 28, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "ECC off", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/T4x1_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 402600, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/T4x1_TRT", "system_name": "Supermicro 4029GP-TRT-OTO-28 (1x T4, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 6100, "task": "image classification", "task2": "image classification", "total_cores": 56, "uid": "a4ac1b12c069d3e1", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 37995670884, "90.00 percentile latency (ns)": 68294246754, "95.00 percentile latency (ns)": 72155735537, "97.00 percentile latency (ns)": 73701068003, "99.00 percentile latency (ns)": 75238359606, "99.90 percentile latency (ns)": 75880185133, "Max latency (ns)": 75880666664, "Mean latency (ns)": 37982048262, "Min duration satisfied": "Yes", "Min latency (ns)": 268158113, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 32008.2, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "40GB", "accelerator_memory_configuration": "HBM2", "accelerator_model_name": "NVIDIA A100-PCIe", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.92, "characteristics.good": 37960, "characteristics.samples_per_second": 32008.2, "characteristics.samples_per_second.normalized_per_core": 32008.2, "characteristics.samples_per_second.normalized_per_processor": 32008.2, "characteristics.total": 50000, "ck_system": "A100-PCIex1_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "TensorRT 7.2, CUDA 11.0 Update 1", "host_memory_capacity": "768 GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 64, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "AMD EPYC 7742", "host_processors_per_node": 2, "host_storage_capacity": "4 TB", "host_storage_type": "NVMe SSD", "hw_notes": "", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/A100-PCIex1_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "TensorRT 7.2, CUDA 11.0 Update 1, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 2428800, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/A100-PCIex1_TRT", "system_name": "Gigabyte G482-Z52 (1x A100-PCIe, TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 36800, "task": "image classification", "task2": "image classification", "total_cores": 128, "uid": "9a4aed34a419e3b9", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" }, { "50.00 percentile latency (ns)": 32774724408, "90.00 percentile latency (ns)": 59214271801, "95.00 percentile latency (ns)": 62508961339, "97.00 percentile latency (ns)": 63833898753, "99.00 percentile latency (ns)": 65205994231, "99.90 percentile latency (ns)": 65749030801, "Max latency (ns)": 65855841356, "Mean latency (ns)": 32797227936, "Min duration satisfied": "Yes", "Min latency (ns)": 93143984, "Min queries satisfied": "Yes", "Mode": "Performance", "Result is": "VALID", "SUT name": "LWIS_Server", "Samples per second": 1032.25, "Scenario": "offline", "accelerator_frequency": "", "accelerator_host_interconnect": "", "accelerator_interconnect": "", "accelerator_interconnect_topology": "", "accelerator_memory_capacity": "Shared with host", "accelerator_memory_configuration": "SRAM", "accelerator_model_name": "NVIDIA Xavier NX", "accelerator_on-chip_memories": "", "accelerators_per_node": 1, "accuracy_log_probability": 0, "accuracy_log_rng_seed": 0, "accuracy_log_sampling_target": 0, "characteristics.accuracy": 75.994, "characteristics.good": 37997, "characteristics.samples_per_second": 1032.25, "characteristics.samples_per_second.normalized_per_core": 1032.25, "characteristics.samples_per_second.normalized_per_processor": 1032.25, "characteristics.total": 50000, "ck_system": "Xavier_NX_TRT", "ck_used": false, "cooling": "", "dataset": "ImageNet 2012", "dataset_link": "https://github.com/ctuning/ck/blob/master/docs/mlperf-automation/datasets/imagenet2012.md", "dim_x_default": "characteristics.samples_per_second", "dim_x_maximize": true, "dim_y_default": "characteristics.accuracy", "dim_y_maximize": true, "division": "closed", "formal_model": "resnet50-v1.5", "formal_model_accuracy": 99.0, "formal_model_link": "https://github.com/mlcommons/ck-mlops/tree/main/package", "framework": "20.09 Jetson CUDA-X AI Developer Preview, TensorRT 7.2, CUDA 10.2", "host_memory_capacity": "8GB", "host_memory_configuration": "", "host_networking": "", "host_networking_topology": "", "host_processor_caches": "", "host_processor_core_count": 6, "host_processor_frequency": "", "host_processor_interconnect": "", "host_processor_model_name": "NVIDIA Carmel (ARMv8.2)", "host_processors_per_node": 1, "host_storage_capacity": "32GB", "host_storage_type": "Micro SD Card", "hw_notes": "GPU and both DLAs are used in resnet50, ssd-mobilenet, and ssd-resnet34, in Offline and MultiStream scenarios", "informal_model": "resnet50", "input_data_types": "int8", "key.accuracy": "characteristics.accuracy", "max_async_queries": 1, "max_duration (ms)": 0, "max_query_count": 0, "min_duration (ms)": 60000, "min_query_count": 1, "mlperf_version": 0.7, "normalize_cores": 1, "normalize_processors": 1, "note_code": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/code", "note_details": "https://github.com/mlcommons/inference_results_v0.7/tree/master/closed/NVIDIA/results/Xavier_NX_TRT", "number_of_nodes": 1, "operating_system": "Ubuntu 18.04.4", "other_software_stack": "20.09 Jetson CUDA-X AI Developer Preview, TensorRT 7.2, CUDA 10.2, cuDNN 8.0.2, DALI 0.25.0", "performance_issue_same": true, "performance_issue_same_index": 0, "performance_issue_unique": true, "performance_sample_count": 2048, "print_timestamps": true, "problem": false, "qsl_rng_seed": 12786827339337101903, "retraining": "N", "sample_index_rng_seed": 12640797754436136668, "samples_per_query": 67980, "schedule_rng_seed": 3135815929913719677, "starting_weights_filename": "resnet50_v1.onnx", "status": "available", "submitter": "NVIDIA", "submitter_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.submitter/NVIDIA", "sw_notes": "", "system_link": "https://github.com/ctuning/ck-mlperf-inference/tree/main/bench.mlperf.system/Xavier_NX_TRT", "system_name": "NVIDIA Jetson Xavier NX (TensorRT)", "system_type": "edge", "target_latency (ns)": 0, "target_qps": 1030, "task": "image classification", "task2": "image classification", "total_cores": 6, "uid": "efec8f424f548a0b", "use_accelerator": true, "weight_data_types": "int8", "weight_transformations": "quantization, affine fusion" } ]