#! /usr/bin/python import ck.kernel as ck import copy import re import argparse,json import os # ReQuEST description. request_dict={ 'report_uid':'08da9685582866a0', # unique UID for a given ReQuEST submission generated manually by user (ck uid) # the same UID will be for the report (in the same repo) 'repo_uoa':'ck-request-asplos18-mobilenets-armcl-opencl', 'repo_uid':'7698eaf859b79f2b', 'repo_cmd':'ck pull repo --url=https://github.com/dividiti/ck-request-asplos18-mobilenets-armcl-opencl', 'farm':'', # if farm of machines 'algorithm_species':'4b8bbc192ec57f63' # image classification } # Some tested experimental platforms. platform_config={ "HiKey960\x00": { "name": "Linaro HiKey960", "id": "linaro-hikey960", "gpu": "Mali-G71 MP8", "gpu_mhz": "807 MHz" }, "Rockchip RK3399 Firefly Board (Linux Opensource)\x00": { "name": "Firefly RK3399", "id": "firefly-rk3399", "gpu": "Mali-T860 MP4", "gpu_mhz": "800 MHz" }, "BLA-L09": { "name": "Huawei Mate 10 Pro", "id": "huawei-mate10pro", "gpu": "Mali-G72 MP12", "gpu_mhz": "767 MHz" } } # ArmCL-specific choices: # WINOGRAD does not support 1x1 convolutions used in MobileNets. convolution_methods = ['DEFAULT','GEMM','DIRECT'] default_convolution_method = 'DEFAULT' # DEFAULT is the library CLTuner which is disabled by default. kernel_tuners = ['NONE','DEFAULT'] default_kernel_tuner = 'NONE' # NHWC is only supported from v18.08. data_layouts = ['NCHW','NHWC'] default_data_layout ='NCHW' def get_ImageNet_path(dataset_env): return dataset_env['meta']['env']['CK_ENV_DATASET_IMAGENET_VAL'] def select_ImageNet(): res = ck.access({'action':'show', 'module_uoa':'env', 'tags':'dataset,imagenet,raw,val'}) if res['return'] > 0: return res datasets = res.get('lst',[]) if datasets: if len(datasets) == 1: return {'return': 0, 'dataset': datasets[0]} ck.out('') ck.out('More than one ImageNet dataset is found suitable for this script:') ck.out('') dataset_choices = [] for d in datasets: dataset_choices.append({ 'data_uid': d['data_uid'], 'data_uoa': get_ImageNet_path(d) }) res = ck.access({'action': 'select_uoa', 'module_uoa': 'choice', 'choices': dataset_choices}) if res['return'] > 0: return res for d in datasets: if d['data_uid'] == res['choice']: return {'return': 0, 'dataset': d} return {'return': 1, 'error': 'No installed ImageNet dataset found'} def do(i, arg): # Process arguments. if (arg.accuracy): experiment_type = 'accuracy' num_repetitions = 1 kernel_tuners = [default_kernel_tuner] else: experiment_type = 'performance' num_repetitions = arg.repetitions random_name = arg.random_name share_platform = arg.share_platform # Detect basic platform info. ii={'action':'detect', 'module_uoa':'platform', 'out':'con'} if share_platform: ii['exchange']='yes' r=ck.access(ii) if r['return']>0: return r # Keep to prepare ReQuEST meta. platform_dict=copy.deepcopy(r) # Host and target OS params. hos=r['host_os_uoa'] hosd=r['host_os_dict'] tos=r['os_uoa'] tosd=r['os_dict'] tdid=r['device_id'] # Determine platform tags: if one of the known platforms, use its id; otherwise, 'unknown-platform'. # FIXME: only works when the target platform is the same as the host platform. platform_tags=platform_config.get(r['features']['platform']['model'], {'id':'unknown-platform'})['id'] # The only supported program. program='mobilenets-armcl-opencl' # Select ImageNet dataset. r = select_ImageNet() if r['return'] > 0: return r imagenet_val = r['dataset'] img_dir_val = get_ImageNet_path(imagenet_val) ck.out('ImageNet path: ' + img_dir_val) if arg.accuracy: # Use as many batches (of size 1), as there are JPEG images in the directory. batch_count = len([f for f in os.listdir(img_dir_val) if f.endswith('.JPEG') and os.path.isfile(os.path.join(img_dir_val, f))]) else: # FIXME: Use 2 batches, using the first for warm up (to be excluded from the average)? batch_count = 1 # Restrict accuracy testing to the ReQuEST fork of ArmCL. TODO: restrict to direct convolution for large datasets. if arg.accuracy and batch_count > 500: use_lib_tags = [ 'request-d8f69c13', '18.05-0acd60ed-request' ] else: use_lib_tags = [ 'request-d8f69c13', '18.11-b9abeae08', '18.08-52ba29e9', '18.05-0acd60ed-request', '18.05-b3a371bc', '18.03-e40997bb', '18.01-f45d5a9b', '17.12-48bc34ea' ] # On Firefly-RK3399, the version hash has only 7 characters, not 8. if platform_tags=='firefly-rk3399': use_lib_tags = [ tag[:-1] for tag in use_lib_tags ] ii={'action':'show', 'module_uoa':'env', 'tags':'dataset,imagenet,aux'} rx=ck.access(ii) if len(rx['lst']) == 0: return rx img_dir_aux = rx['lst'][0]['meta']['env']['CK_ENV_DATASET_IMAGENET_AUX'] ii={'action':'load', 'module_uoa':'program', 'data_uoa':program} rx=ck.access(ii) if rx['return']>0: return rx mm=rx['dict'] # Get compile-time and run-time deps. cdeps=mm.get('compile_deps',{}) rdeps=mm.get('run_deps',{}) # Merge rdeps with cdeps for setting up the pipeline (which uses # common deps), but tag them as "for_run_time". for k in rdeps: cdeps[k]=rdeps[k] cdeps[k]['for_run_time']='yes' depl=copy.deepcopy(cdeps['library']) if (arg.tos is not None) and (arg.did is not None): tos=arg.tos tdid=arg.did ii={'action':'resolve', 'module_uoa':'env', 'host_os':hos, 'target_os':tos, 'device_id':tdid, 'out':'con', 'deps':{'library':copy.deepcopy(depl)}, 'quiet':'yes' } r=ck.access(ii) if r['return']>0: return r udepl=r['deps']['library'].get('choices',[]) # All UOAs of env for Arm Compute Libraries. if len(udepl)==0: return {'return':1, 'error':'no installed Arm Compute Libraries'} cdeps['library']['uoa']=udepl[0] depm=copy.deepcopy(cdeps['weights']) ii={'action':'resolve', 'module_uoa':'env', 'host_os':hos, 'target_os':tos, 'device_id':tdid, 'out':'con', 'deps':{'weights':copy.deepcopy(depm)}, 'quiet':'yes' } r=ck.access(ii) if r['return']>0: return r udepm=r['deps']['weights'].get('choices',[]) if len(udepm)==0: return {'return':1, 'error':'no installed Weights'} cdeps['library']['uoa']=udepl[0] cdeps['weights']['uoa']=udepm[0] ii={'action':'pipeline', 'prepare':'yes', 'dependencies':cdeps, 'module_uoa':'program', 'data_uoa':program, 'target_os':tos, 'device_id':tdid, 'no_state_check':'yes', 'no_compiler_description':'yes', 'skip_calibration':'yes', 'env':{ 'CK_ENV_DATASET_IMAGENET_VAL':img_dir_val, 'CK_BATCH_COUNT':batch_count, 'CK_SKIP_IMAGES':0 }, 'cpu_freq':'max', 'gpu_freq':'max', 'flags':'-O3', 'speed':'no', 'energy':'no', 'skip_print_timers':'yes', 'out':'con' } r=ck.access(ii) if r['return']>0: return r fail=r.get('fail','') if fail=='yes': return {'return':10, 'error':'pipeline failed ('+r.get('fail_reason','')+')'} ready=r.get('ready','') if ready!='yes': return {'return':11, 'error':'pipeline not ready'} state=r['state'] tmp_dir=state['tmp_dir'] # Remember resolved deps for this benchmarking session. xcdeps=r.get('dependencies',{}) # Clean pipeline. if 'ready' in r: del(r['ready']) if 'fail' in r: del(r['fail']) if 'return' in r: del(r['return']) experiment_count = 0 pipeline=copy.deepcopy(r) for lib_uoa in udepl: # Load ArmCL lib. ii={'action':'load', 'module_uoa':'env', 'data_uoa':lib_uoa} r=ck.access(ii) if r['return']>0: return r lib_name=r['data_name'] lib_tags=r['dict']['customize']['version'] # Skip other libraries if one is explicitly specified. if arg.library_uoa and lib_uoa != arg.library_uoa: continue # Skip some libs with "in [..]" or "not in [..]". if arg.accuracy and lib_tags not in use_lib_tags: continue skip_compile='no' # For each MobileNets model.************************************************* for model_uoa in udepm: # Load model. ii={'action':'load', 'module_uoa':'env', 'data_uoa':model_uoa} r=ck.access(ii) if r['return']>0: return r model_name=r['data_name'] # Skip other models if one is explicitly specified. if arg.model_uoa and model_uoa != arg.model_uoa: continue # Skip aggregate MobileNets packages. if 'mobilenet-all' in r['dict']['tags']: continue batch_size=1 version=1 multiplier=float(r['dict']['env']['CK_ENV_MOBILENET_MULTIPLIER']) resolution=int(r['dict']['env']['CK_ENV_MOBILENET_RESOLUTION']) record_repo='local' record_uoa='{}-{}-{}-mobilenet-v{}-{:.2f}-{}'.format(experiment_type, platform_tags, lib_tags, version, multiplier, resolution) # Skip the experiment if it already exists. if arg.resume: r = ck.access({'action':'search', 'module_uoa':'experiment', 'repo_uoa':record_repo, 'data_uoa':record_uoa}) if r['return']>0: return r if len(r['lst']) > 0: ck.out('Experiment "%s" already exists, skipping...' % record_uoa) continue # Prepare pipeline. ck.out('---------------------------------------------------------------------------------------') ck.out('%s - %s' % (lib_name, lib_uoa)) ck.out('%s - %s' % (model_name, model_uoa)) ck.out('Experiment - %s:%s' % (record_repo, record_uoa)) experiment_count += 1 # Prepare autotuning input. cpipeline=copy.deepcopy(pipeline) # Reset deps and change UOA. new_deps={'library':copy.deepcopy(depl), 'weights':copy.deepcopy(depm)} new_deps['library']['uoa']=lib_uoa new_deps['weights']['uoa']=model_uoa jj={'action':'resolve', 'module_uoa':'env', 'host_os':hos, 'target_os':tos, 'device_id':tdid, 'deps':new_deps} r=ck.access(jj) if r['return']>0: return r cpipeline['dependencies'].update(new_deps) cpipeline['no_clean']=skip_compile cpipeline['no_compile']=skip_compile # Prepare common meta for ReQuEST tournament features=copy.deepcopy(cpipeline['features']) platform_dict['features'].update(features) r=ck.access({'action':'prepare_common_meta', 'module_uoa':'request.asplos18', 'platform_dict':platform_dict, 'deps':cpipeline['dependencies'], 'request_dict':request_dict}) if r['return']>0: return r record_dict=r['record_dict'] meta=r['meta'] if random_name: rx=ck.gen_uid({}) if rx['return']>0: return rx record_uoa=rx['data_uid'] tags=r['tags'] tags.append(experiment_type) tags.append('explore-mobilenets-'+experiment_type) tags.append(lib_tags) tags.append(platform_tags) tags.append(str(resolution)) tags.append(str(multiplier)) tags.append('mobilenet-v{}'.format(version)) tags.append('mobilenet-v{}-{:.2f}-{}'.format(version, multiplier, resolution)) ii={'action':'autotune', 'module_uoa':'pipeline', 'data_uoa':'program', 'choices_order':[ [ '##choices#env#CK_BATCH_SIZE' ], [ '##choices#env#CK_ENV_MOBILENET_VERSION' ], [ '##choices#env#CK_ENV_MOBILENET_MULTIPLIER' ], [ '##choices#env#CK_ENV_MOBILENET_RESOLUTION' ], [ '##choices#env#CK_CONVOLUTION_METHOD' ], [ '##choices#env#CK_LWS_TUNER_TYPE' ], [ '##choices#env#CK_DATA_LAYOUT' ], ], 'choices_selection':[ {'type':'loop', 'choice':[batch_size], 'default':1}, # Only batch_size=1 is supported. {'type':'loop', 'choice':[version], 'default':1}, # Only version=1 is supported. {'type':'loop', 'choice':[multiplier], 'default':1.0}, {'type':'loop', 'choice':[resolution], 'default':224}, {'type':'loop', 'choice':convolution_methods, 'default':default_convolution_method}, {'type':'loop', 'choice':kernel_tuners, 'default':default_kernel_tuner}, {'type':'loop', 'choice':data_layouts, 'default':default_data_layout}, ], 'features_keys_to_process':['##choices#*'], 'iterations':-1, 'repetitions': num_repetitions, 'record':'yes', 'record_failed':'yes', 'record_params':{ 'search_point_by_features':'yes' }, 'tags':tags, 'meta':meta, 'record_dict':record_dict, 'record_repo':record_repo, 'record_uoa':record_uoa, 'pipeline':cpipeline, 'out':'con' } if not arg.dry_run: r=ck.access(ii) if r['return']>0: return r fail=r.get('fail','') if fail=='yes': return {'return':10, 'error':'pipeline failed ('+r.get('fail_reason','')+')'} if arg.dry_run: ck.out('---------------------------------------------------------------------------------------') ck.out('Experiment count: %d' % experiment_count) ### end pipeline return {'return':0} ############################################################################################## parser = argparse.ArgumentParser(description='Pipeline') parser.add_argument("--target_os", action="store", dest="tos") parser.add_argument("--device_id", action="store", dest="did") parser.add_argument("--accuracy", action="store_true", default=False, dest="accuracy") parser.add_argument("--repetitions", action="store", default=10, dest="repetitions") parser.add_argument("--random_name", action="store_true", default=False, dest="random_name") parser.add_argument("--share_platform", action="store_true", default=False, dest="share_platform") parser.add_argument("--dry_run", action="store_true", default=False, dest="dry_run") parser.add_argument("--library_uoa", action="store", default='', dest="library_uoa") parser.add_argument("--model_uoa", action="store", default='', dest="model_uoa") parser.add_argument("--resume", action="store_true", default=False, dest="resume") myarg=parser.parse_args() r=do({}, myarg) if r['return']>0: ck.err(r)