# Node: ResNet-34 with ImageNet -- 34 layers bottleneck ResNet for image classification # Reference: "Deep Residual Learning for Image Recognition" https://arxiv.org/abs/1512.03385 command = TrainNetwork precision = "float"; traceLevel = 1 ; deviceId = "auto" rootDir = "." configDir = "$RootDir$" dataDir = "$RootDir$" outputDir = "$RootDir$/Output" modelDir = "$OutputDir$/Models" meanDir = "$dataDir$" modelPath = "$modelDir$/ResNet_34" stderr = "$outputDir$/ResNet_34_BS_out" parallelTrain = true TrainNetwork = { action = "train" BrainScriptNetworkBuilder = { include "$configDir$/Macros.bs" imageShape = 224:224:3 # image dimensions labelDim = 1000 # number of distinct labels cMap = 64:128:256:512 numLayers = 3:3:5:2 bnTimeConst = 4096 model = Sequential( # conv1 and max pooling ConvBNReLULayer {cMap[0], (7:7), (2:2), bnTimeConst} : MaxPoolingLayer {(3:3), stride = 2, pad = true} : ResNetBasicStack {numLayers[0], cMap[0], bnTimeConst} : ResNetBasicInc {cMap[1], (2:2), bnTimeConst} : ResNetBasicStack {numLayers[1], cMap[1], bnTimeConst} : ResNetBasicInc {cMap[2], (2:2), bnTimeConst} : ResNetBasicStack {numLayers[2], cMap[2], bnTimeConst} : ResNetBasicInc {cMap[3], (2:2), bnTimeConst} : ResNetBasicStack {numLayers[3], cMap[3], bnTimeConst} : # avg pooling AveragePoolingLayer {(7: 7), stride = 1} : # FC LinearLayer {labelDim, init = 'uniform'} ) # inputs features = Input {imageShape} labels = Input {labelDim} # apply model to features z = model (features) # loss and error computation ce = CrossEntropyWithSoftmax (labels, z) errs = ClassificationError (labels, z) top5Errs = ClassificationError (labels, z, topN = 5) # declare special nodes featureNodes = (features) labelNodes = (labels) criterionNodes = (ce) evaluationNodes = (errs) # top5Errs only used in Eval outputNodes = (z) } SGD = { epochSize = 0 minibatchSize = 256 maxEpochs = 125 learningRatesPerMB = 1*30: 0.1*30: 0.01*30: 0.001 momentumPerMB = 0.9 gradUpdateType = "None" L2RegWeight = 0.0001 dropoutRate = 0 numMBsToShowResult = 500 disableRegInBatchNormalization = true ParallelTrain = { parallelizationMethod = "DataParallelSGD" distributedMBReading = true parallelizationStartEpoch = 1 DataParallelSGD = { gradientBits = 32 } } } reader = { readerType = "ImageReader" file = "$DataDir$/train_map.txt" randomize = "Auto" features = { width = 224 height = 224 channels = 3 cropType = "RandomSide" jitterType = "UniRatio" sideRatio = 0.46666:0.875 aspectRatioRadius = 0.8:1.0 brightnessRadius = 0.2 contrastRadius = 0.2 saturationRadius = 0.4 hflip = true meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { labelDim = 1000 } } cvReader = { readerType = "ImageReader" file = "$DataDir$/val_map.txt" randomize = "None" features = { width = 224 height = 224 channels = 3 cropType = "Center" sideRatio = 0.875 meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { labelDim = 1000 } } } # Re-statistics the mean and variance of batch normalization layers while other parameters frozen after training # more details: https://github.com/Microsoft/CNTK/wiki/Post-Batch-Normalization-Statistics#post-batch-normalization-statistics BNStatistics = { action = "bnstat" modelPath = "$modelPath$" minibatchSize = 256 itersPerNode = 30 enableDistributedMBReading = true reader = { readerType = "ImageReader" file = "$DataDir$/train_map.txt" randomize = "Auto" features = { width = 224 height = 224 channels = 3 cropType = "RandomSide" hflip = true sideRatio = 0.46666:0.875 jitterType = "UniRatio" meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { labelDim = 1000 } } } Eval = { action = "test" modelPath = "$modelPath$.PBN" minibatchSize = 200 evalNodeNames = errs:top5Errs reader={ readerType = "ImageReader" file = "$DataDir$/val_map.txt" randomize = "None" features = { width = 224 height = 224 channels = 3 cropType = "Center" sideRatio = 0.875 meanFile = "$meanDir$/ImageNet1K_mean.xml" } labels = { labelDim = 1000 } } }