import os import torch import torch.nn as nn import struct import numpy as np import json from time import perf_counter from pprint import pprint from lstm_rnnt_dec import PluginLstmRnntDec start_setup_time = perf_counter() # Setup. output_bin = os.environ.get('CK_OUT_RAW_DATA', 'tmp-ck-output.bin') output_json = output_bin.replace('bin', 'json') dataset_path = os.environ.get('CK_DATASET_PATH', '') dataset_prefix = os.environ.get('CK_LSTM_DATASET_PREFIX', 'sample') logit_count = os.environ.get('CK_LSTM_LOGIT_COUNT', '1') op_id = os.environ.get('CK_LSTM_OP_ID', '') sample_id = os.environ.get('CK_LSTM_SAMPLE_ID', '0').zfill(6) layers = int(os.environ.get('CK_LSTM_LAYERS', '2')) hidden_width = int(os.environ.get('CK_LSTM_HIDDEN_WIDTH', '320')) input_width = int(os.environ.get('CK_LSTM_INPUT_WIDTH', '320')) logit_count = int(os.environ.get('CK_LSTM_LOGIT_COUNT', '128')) batch_size = int(os.environ.get('CK_LSTM_BATCH_SIZE', '1')) dropout = float(os.environ.get('CK_LSTM_DROPOUT', '0.0')) rnd_seed = int(os.environ.get('CK_SEED', '42')) rng = np.random.RandomState(rnd_seed) print_in_tensor = os.environ.get('CK_PRINT_IN_TENSOR', 'no') in [ 'yes', 'YES', 'ON', 'on', '1' ] print_out_tensor = os.environ.get('CK_PRINT_OUT_TENSOR', 'no') in [ 'yes', 'YES', 'ON', 'on', '1' ] sample_file = os.path.join(dataset_path, '{}{}-DEC0000.pt'.format(dataset_path, dataset_prefix)) sizeof_float32 = 4 # LOAD LSTM lstm = PluginLstmRnntDec() # LOAD DATA if os.path.exists(sample_file): input_data = [] for i in range(logit_count): sample_file = os.path.join(dataset_path, '{}{}-DEC{}.pt'.format(dataset_path, dataset_prefix, str(i).zfill(4))) input_data.append(torch.load(sample_file)) else: # Generate random input data input_data = [] for i in range(logit_count): input_x = rng.randn(1, batch_size, input_width).astype(np.float32) input_x = torch.from_numpy(input_x) input_h = rng.randn(2, batch_size, hidden_width).astype(np.float32) input_h = torch.from_numpy(input_h) input_c = rng.randn(2, batch_size, hidden_width).astype(np.float32) input_c = torch.from_numpy(input_c) input_data.append([input_x,(input_h,input_c)]) if print_in_tensor: print("Input:") pprint(input_data) print("") finish_setup_time = perf_counter() # RUN THE TEST output = torch.zeros([logit_count,1,hidden_width]) for i in range(logit_count): outx, _ = lstm(input_data[i][0], input_data[i][1]) output[i:i+1]=outx finish_lstm_time = perf_counter() # Print output as tensor. if print_out_tensor: print("LSTM Output:") pprint(output) # Convert output to flat list. output_list = output.flatten().tolist() # Dump output as binary. with open(output_bin, 'wb') as output_file: output_file.write( struct.pack('f'*len(output_list), *output_list) ) # Dump output as JSON. with open(output_json, 'w') as output_file: output_file.write( json.dumps(output_list, indent=2) ) # Dump timing and misc info. height, batch, width = output.size() timer_json = 'tmp-ck-timer.json' with open(timer_json, 'w') as output_file: timer = { "execution_time": (finish_lstm_time - start_setup_time), "run_time_state": { "input_width": input_width, "hidden_width": hidden_width, "num_layers": layers, "logit_count": logit_count, "out_shape_N": batch, "out_shape_C": 1, "out_shape_H": height, "out_shape_W": width, "rnd_seed": rnd_seed, "data_bits": sizeof_float32*8, "time_setup": (finish_setup_time - start_setup_time), "time_test": (finish_lstm_time - finish_setup_time) } } output_file.write( json.dumps(timer, indent=2) )