# # Copyright (c) 2020 cTuning foundation. # See CK COPYRIGHT.txt for copyright details. # # SPDX-License-Identifier: BSD-3-Clause. # See CK LICENSE.txt for licensing details. # import os import json import re from subprocess import check_output MLPERF_LOG_ACCURACY_JSON = 'mlperf_log_accuracy.json' MLPERF_LOG_DETAIL_TXT = 'mlperf_log_detail.txt' MLPERF_LOG_SUMMARY_TXT = 'mlperf_log_summary.txt' MLPERF_LOG_TRACE_JSON = 'mlperf_log_trace.json' MLPERF_USER_CONF = 'user.conf' MLPERF_AUDIT_CONF = 'audit.config' ACCURACY_TXT = 'accuracy.txt' RNNT_TIMING_INSTRUMENTATION_JSON = 'instr_timing.json' RNNT_ACC_INSTRUMENTATION_JSON = 'instr_accuracy.json' LABELS = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] def logits_to_string(logits, labels): labels_map = dict([(i, labels[i]) for i in range(len(labels))]) return ''.join([labels_map[c] for c in logits]) def keyval(d, key): for k,v in d.items(): if isinstance(v, dict): result = keyval(v, key) if result != None: #print(k) return result else: if(k == key): return v def ck_postprocess(i): print('\n--------------------------------') env = i['env'] deps = i['deps'] include_trace = env.get('CK_LOADGEN_INCLUDE_TRACE', '') in ('YES', 'Yes', 'yes', 'TRUE', 'True', 'true', 'ON', 'On', 'on', '1') inference_src_env = deps['mlperf-inference-src']['dict']['env'] MLPERF_MAIN_CONF = inference_src_env['CK_ENV_MLPERF_INFERENCE_MLPERF_CONF'] save_dict = {} # Save logs. save_dict['mlperf_log'] = {} mlperf_log_dict = save_dict['mlperf_log'] mlperf_conf_dict = save_dict['mlperf_conf'] = {} with open(MLPERF_LOG_ACCURACY_JSON, 'r') as accuracy_file: mlperf_log_dict['accuracy'] = json.load(accuracy_file) with open(MLPERF_LOG_SUMMARY_TXT, 'r') as summary_file: unstripped_summary_lines = summary_file.readlines() mlperf_log_dict['summary'] = unstripped_summary_lines save_dict['parsed_summary'] = {} parsed_summary = save_dict['parsed_summary'] for line in unstripped_summary_lines: pair = line.strip().split(': ', 1) if len(pair)==2: parsed_summary[ pair[0].strip() ] = pair[1].strip() with open(MLPERF_LOG_DETAIL_TXT, 'r') as detail_file: mlperf_log_dict['detail'] = detail_file.readlines() import os if include_trace and os.stat(MLPERF_LOG_TRACE_JSON).st_size!=0: with open(MLPERF_LOG_TRACE_JSON, 'r') as trace_file: mlperf_log_dict['trace'] = json.load(trace_file) for conf_path in (MLPERF_MAIN_CONF, MLPERF_USER_CONF, MLPERF_AUDIT_CONF): if os.path.exists( conf_path ): with open(conf_path, 'r') as conf_fd: mlperf_conf_dict[ os.path.basename(conf_path) ] = conf_fd.readlines() # Check accuracy in accuracy mode. accuracy_mode = False if mlperf_log_dict['accuracy'] != []: accuracy_mode = True inference_dir = i['deps']['lib-python-loadgen']['dict']['deps']['mlperf-inference']['dict']['env']['CK_ENV_MLPERF_INFERENCE'] accuracy_script = os.path.join( inference_dir, 'speech_recognition', 'rnnt', 'accuracy_eval.py' ) dataset_dir = os.path.join(keyval(i, 'CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR'), '..') manifest = os.path.join(keyval(i, 'CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR'),'wav-list.json') command = [ i['deps']['python']['dict']['env']['CK_ENV_COMPILER_PYTHON_FILE'], accuracy_script, '--log_dir', '.', '--dataset_dir', dataset_dir, '--manifest', manifest ] output = check_output(command).decode('ascii') print(output) with open(ACCURACY_TXT, 'w') as accuracy_file: accuracy_file.write(output) matchObj = re.search('Word Error Rate: (.+)%, accuracy=(.+)%', output) save_dict['wer'] = float( matchObj.group(1) ) save_dict['accuracy'] = float( matchObj.group(2) ) if os.path.isfile(RNNT_TIMING_INSTRUMENTATION_JSON) and \ os.path.isfile(RNNT_ACC_INSTRUMENTATION_JSON): instrumentation = [] # Open the json manifest man_path = keyval(i, 'CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR') man_path = os.path.join(man_path, 'wav-list.json') with open(man_path, 'r') as manifest_file: manifest = json.load(manifest_file) with open(RNNT_TIMING_INSTRUMENTATION_JSON, 'r') as instr_file: timings = json.load(instr_file) with open(RNNT_ACC_INSTRUMENTATION_JSON, 'r') as instr_file: accuracy = json.load(instr_file) for s in accuracy['samples']: sample = {} sample['hypothesis']=logits_to_string(s['hypothesis'], LABELS) sample['reference']=logits_to_string(s['reference'], LABELS) #from metrics import word_error_rate #wer, scores, num_words = word_error_rate( # [sample['hypothesis']], references=[sample['reference']]) wer, scores, num_words = 0,0,0 for t in timings['samples']: if s['result']['qsl_idx'] == t['qsl_idx']: sample['total_time'] = t['total_time'] sample['pre_time'] = t['pre_time'] sample['post_time'] = t['post_time'] sample['dec_time'] = t['dec_time'] for m in manifest: if sample['reference'] == m['transcript']: sample['duration']=m['original_duration'] sample['audio_filepath']=m['files'][0]['fname'] sample['wer'] = wer sample['scores'] = scores sample['num_words'] = num_words sample['qsl_idx'] = s['result']['qsl_idx'] instrumentation.append(sample) save_dict['instrumentation'] = { 'wer': accuracy['wer'], \ 'samples': instrumentation} save_dict['execution_time'] = timings['execution_time'] with open('tmp-ck-timer.json', 'w') as save_file: json.dump(save_dict, save_file, indent=2, sort_keys=True) print('--------------------------------\n') return {'return': 0}