# # Copyright (c) 2020 cTuning foundation. # See CK COPYRIGHT.txt for copyright details. # # SPDX-License-Identifier: BSD-3-Clause. # See CK LICENSE.txt for licensing details. # import os import json import re MLPERF_LOG_ACCURACY_JSON = 'mlperf_log_accuracy.json' MLPERF_LOG_DETAIL_TXT = 'mlperf_log_detail.txt' MLPERF_LOG_SUMMARY_TXT = 'mlperf_log_summary.txt' MLPERF_LOG_TRACE_JSON = 'mlperf_log_trace.json' RNNT_TIMING_INSTRUMENTATION_JSON = 'instr_timing.json' RNNT_ACC_INSTRUMENTATION_JSON = 'instr_accuracy.json' LABELS = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] def logits_to_string(logits, labels): labels_map = dict([(i, labels[i]) for i in range(len(labels))]) return ''.join([labels_map[c] for c in logits]) def keyval(d, key): for k,v in d.items(): if isinstance(v, dict): result = keyval(v, key) if result != None: return result else: if(k == key): return v def ck_postprocess(i): print('\n--------------------------------') save_dict = {} # Save logs. save_dict['mlperf_log'] = {} mlperf_log_dict = save_dict['mlperf_log'] with open(MLPERF_LOG_ACCURACY_JSON, 'r') as accuracy_file: mlperf_log_dict['accuracy'] = json.load(accuracy_file) with open(MLPERF_LOG_SUMMARY_TXT, 'r') as summary_file: unstripped_summary_lines = summary_file.readlines() mlperf_log_dict['summary'] = unstripped_summary_lines save_dict['parsed_summary'] = {} parsed_summary = save_dict['parsed_summary'] for line in unstripped_summary_lines: pair = line.strip().split(': ', 1) if len(pair)==2: parsed_summary[ pair[0].strip() ] = pair[1].strip() with open(MLPERF_LOG_DETAIL_TXT, 'r') as detail_file: mlperf_log_dict['detail'] = detail_file.readlines() import os if os.stat(MLPERF_LOG_TRACE_JSON).st_size==0: mlperf_log_dict['trace'] = {} else: with open(MLPERF_LOG_TRACE_JSON, 'r') as trace_file: mlperf_log_dict['trace'] = json.load(trace_file) # Check accuracy in accuracy mode. accuracy_mode = False if mlperf_log_dict['accuracy'] != []: accuracy_mode = True if os.path.isfile(RNNT_TIMING_INSTRUMENTATION_JSON) and \ os.path.isfile(RNNT_ACC_INSTRUMENTATION_JSON): instrumentation = [] # Open the json manifest man_path = keyval(i, 'CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR') man_path = os.path.join(man_path, 'wav-list.json') with open(man_path, 'r') as manifest_file: manifest = json.load(manifest_file) with open(RNNT_TIMING_INSTRUMENTATION_JSON, 'r') as instr_file: timings = json.load(instr_file) with open(RNNT_ACC_INSTRUMENTATION_JSON, 'r') as instr_file: accuracy = json.load(instr_file) for s in accuracy['samples']: sample = {} sample['hypothesis']=logits_to_string(s['hypothesis'], LABELS) sample['reference']=logits_to_string(s['reference'], LABELS) #from metrics import word_error_rate #wer, scores, num_words = word_error_rate( # [sample['hypothesis']], references=[sample['reference']]) wer, scores, num_words = 0,0,0 for t in timings['samples']: if s['result']['qsl_idx'] == t['qsl_idx']: sample['total_time'] = t['total_time'] sample['pre_time'] = t['pre_time'] sample['post_time'] = t['post_time'] sample['dec_time'] = t['dec_time'] for m in manifest: if sample['reference'] == m['transcript']: sample['duration']=m['original_duration'] sample['audio_filepath']=m['files'][0]['fname'] sample['wer'] = wer sample['scores'] = scores sample['num_words'] = num_words sample['qsl_idx'] = s['result']['qsl_idx'] instrumentation.append(sample) save_dict['instrumentation'] = { 'wer': accuracy['wer'], \ 'samples': instrumentation} save_dict['execution_time'] = timings['execution_time'] with open('tmp-ck-timer.json', 'w') as save_file: json.dump(save_dict, save_file, indent=2, sort_keys=True) print('--------------------------------\n') return {'return': 0}