CMUDict boolean argument expected

My version is 1.5 (Linux 64bit)

config:

# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.

# TODO: The new CMUDict data do not match Kaisheng Yao's paper setup (e.g. IH/AH).
#       Track down & reconcile before committing this to master.

############################################################################
# G2P.cntk                                                                 #
#                                                                          #
# Example for sequence-to-sequence modeling for grapheme-to-phoneme        #
# (aka letter-to-sound) conversion on the CMUDict                          #
############################################################################

# directory defaults (if not overridden)

RunRootDir = "../.."      # default if not overridden
DataDir    = "$RunRootDir$/Data"
OutDir     = "$RunRootDir$/Out"

# command to execute

command = train
#command = write
#command = dump

makeMode = false          # set this to true to enable restarting fr0m checkpoint
traceLevel = 1

# experiment id

deviceId = 0              # set the GPU device here, or "auto" to auto-select; or override from the command line.
ExpId = g2p-01-$deviceId$ # choose a meaningful id here. This is used for unique directory and filenames.
#ExpId = g2p-01-0         # change to different id when decoding a different model

# model

modelPath  = "$OutDir$/$ExpId$/G2P.dnn"
stderr     = "$OutDir$/$ExpId$/G2P"

# decoding config  --used by the "write" command ("write" decodes and writes the result)

beamDepth = 3                                      # 0=predict; 1=greedy; >1=beam
decodeModel = 9
decodeModelPath = "$modelPath$.$decodeModel$"      # note: epoch to decode is appended to the model path
decodeOutputPath = "$decodeModelPath$.$beamDepth$" # results are written next to the model, with beamDepth appended

# dump config  --used by the "dump" command, for inspecting the model parameters

dumpModelPath = "$modelPath$.2" # put the epoch id here

# top-level model configuration

hiddenDim = 512 
precision  = "float"
maxLayer = 2
isBidirectional = false

# comment/uncomment this or the next block to switch between readers
# --- begin uncomment for LMSequenceReader ---
readerType = "LMSequenceReader"
useCNTKTextFormatReader = false
inputVocabSize = 69
labelVocabSize = 69
mbSizes = 144:144:288*5:576
shareEmbeddings = true
fileExt = "txt"
# --- end uncomment ---

# --- begin uncomment for CNTKTextFormatReader ---
# Note: Currently this configuration cannot reach the same result with CNTKTextFormatReader.
# This is being investigated. For now, please use the LMSequenceReader.
#readerType = "CNTKTextFormatReader"
#useCNTKTextFormatReader = true
#inputVocabSize = 29     # 26 letters plus start, end, apostrophe
#labelVocabSize = 41     # 39 phonemes (~AX missing), plus start and end symbol (in index 0)
#mbSizes = 72:72:144:288 # new reader is based on max(stream lengths) instead of sum(stream lengths)
#shareEmbeddings = false
#fileExt = "bsf.ctf"
# --- end uncomment ---

# corpus

maxLength = 20           # 0 disables attention
startSymbol = "<s>"      # (need to override the default which is </s>)
trainFile   = "cmudict-0.7b.train-dev-20-21.$fileExt$"
validFile   = "cmudict-0.7b.train-dev-1-21.$fileExt$"
testFile    = "cmudict-0.7b.test.$fileExt$"
mappingFile = "cmudict-0.7b.mapping"

# some reader variables that occur multiple times

cntkReaderInputDef             = [ rawInput = [ alias = "s" ; dim = $inputVocabSize$ ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = $labelVocabSize$ ;  format = "sparse" ] ]
lmSequenceReaderInputDef       = [ dim = 0 ]
lmSequenceReaderInputLabelsDef = [ dim = 1 ; labelType = "category" ;  labelDim = "$inputVocabSize$" ; labelMappingFile = "$DataDir$/$mappingFile$" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]

#######################################
#  network definition                 #
#######################################

BrainScriptNetworkBuilder = (new ComputationNetwork [

    # import general config options from outside config values
    useCNTKTextFormatReader = $useCNTKTextFormatReader$

    inputVocabDim = $inputVocabSize$
    labelVocabDim = $labelVocabSize$

    isAutoencoder = false               # input is only one sequence, meant to reproduce itself (not used for this task)
    attentionSpan = $maxLength$         # attention window, must be large enough for largest input sequence. 0 to disable. Exactly 20 is needed for the g2p CMUDict task
    useBidirectionalEncoder = $isBidirectional$ # bi-directional LSTM for encoder

    shareEmbeddings = $shareEmbeddings$
    hiddenDim       = $hiddenDim$
    attentionDim    = 128               # dim of attention  projection
    maxLayer        = $maxLayer$        # e.g. 2 for 3 hidden layers

    useStabilizer = true
    useEncoder    = true                # if false, this becomes a regular RNN
    useNYUStyle   = false               # if true use thought vector for all inputs, NYU-style

    # dimensions
    embeddingDim = 300
    inputEmbeddingDim = if inputVocabDim < 300 then inputVocabDim else embeddingDim
    labelEmbeddingDim = if labelVocabDim < 300 then labelVocabDim else embeddingDim

    encoderDims[i:0..maxLayer] = hiddenDim # this defines the number of hidden layers in each
    decoderDims[i:0..maxLayer] = hiddenDim # both are one LSTM layer only for now

    #############################################################
    # inputs
    #############################################################

    # inputs and axes must be defined on top-scope level in order to get a clean node name from BrainScript.
    inputAxis = DynamicAxis()
    rawInput  = if useCNTKTextFormatReader && !isAutoencoder
                then Input (inputVocabDim, dynamicAxis=inputAxis, tag='feature')
                else Input (inputVocabDim,                        tag='feature')
    rawLabels = if useCNTKTextFormatReader && !isAutoencoder
                then Input (labelVocabDim, tag='label')
                else rawInput

    # get out input and label data
    # Specifically, if the input and label is on a single line, we must split it in two.
    streams = [
        out = if isAutoencoder || useCNTKTextFormatReader then [
            input  = TraceSparse (rawInput, 'inp')
            labels = TraceSparse (rawLabels, 'lbl')
        ]
        else [
            separatorRow = 2                                                                             # row index of separator symbol 
            isSeparator = RowSlice (separatorRow, 1, rawInput)                                           # cut out the separator as a flag
            inInput  = BS.Boolean.Or (FutureValue (1, inInput , defaultHiddenActivation=0), isSeparator) # flag sequence: word is input...
            inLabels = BS.Boolean.Or (PastValue   (1, inLabels, defaultHiddenActivation=0), isSeparator) # ...or labels
            input  = BS.Sequences.Gather (inInput,  rawInput)                                            # use flags to split raw input into input and labels
            labels = BS.Sequences.Gather (inLabels, rawInput)                                            # (both have different lengths)
        ]
    ].out

    # inputs and labels are expected to be surrounded by sentence delimiters, e.g. <s> A B C </s>  ==>  <s> D E F </s>
    # The encoder uses all tokens of 'input', while for the target labels we must exclude the initial sentence start, which is only used as the LM history.

    inputSequence = Pass (streams.input)                             # e.g. <s> A   B   C    </s>
    labelSequence = Pass (Slice (1,  0, streams.labels,  axis=-1))   # e.g. D   E   F   </s>
    labelSentenceStart = Pass (BS.Sequences.First (streams.labels))  # e.g. <s>
    inputSequenceDim = inputVocabDim
    labelSequenceDim = labelVocabDim

    isFirstLabel = BS.Loop.IsFirst (labelSequence)

    #############################################################
    # embeddings
    #############################################################

    # Note: when reading input and labels from a single text file, we share the token mapping and embedding.
    # Note: Embeddings are linear. Should we use BatchNormalization?

    # note: this is assumed to be applied transposed, hence the swapped dimensions. Actually--why? Still needed?
    Einput  =                                     BS.Parameters.WeightParam (inputSequenceDim, inputEmbeddingDim)
    Elabels = if shareEmbeddings then Einput else BS.Parameters.WeightParam (labelSequenceDim, labelEmbeddingDim)
    EmbedInput (x)  = if inputSequenceDim == inputEmbeddingDim then x else TransposeTimes (Einput, x)
    EmbedLabels (x) = if labelSequenceDim == labelEmbeddingDim then x else TransposeTimes (Elabels, x)

    inputEmbedded  = EmbedInput  (inputSequence)
    labelsEmbedded = EmbedLabels (labelSequence)
    labelSentenceStartEmbedded = Pass (EmbedLabels (labelSentenceStart))  # TODO: remove Pass() if not actually needed in decoder
    labelSentenceStartEmbeddedScattered = BS.Sequences.Scatter (isFirstLabel, labelSentenceStartEmbedded) # unfortunately needed presently

    S(x) = BS.Parameters.Stabilize (x, enabled=useStabilizer)

    #############################################################
    # encoder (processes inputEmbedded)
    #############################################################

    # Note: We reverse our input by running the recurrence from right to left.

    encoderFunction = if useBidirectionalEncoder then BS.RNNs.RecurrentBirectionalLSTMPStack else BS.RNNs.RecurrentLSTMPStack
    encoder = encoderFunction (encoderDims, cellDims=encoderDims, S(inputEmbedded), inputDim=inputEmbeddingDim,
        previousHook=if useBidirectionalEncoder then BS.RNNs.PreviousHC else BS.RNNs.NextHC,
        enableSelfStabilization=useStabilizer)
    encoderOutput = encoder[Length (encoderDims)-1]

    # get the final encoder state for use as the initial state (not used with attention model)
    # Since we run right-to-left, the final state is the first, not the last.
    # For beam decoding, we will also inject a second dimension.
    thoughtVector = [
        h = ReshapeDimension (BS.Sequences.First (encoderOutput.h), 1, (dim:1))
        c = ReshapeDimension (BS.Sequences.First (encoderOutput.c), 1, (dim:1))
        dim = encoderOutput.dim
    ]

    thoughtVectorBroadcast = [ # broadcast to all time steps of the target sequence
        h = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.h)
        c = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.c)
        dim = thoughtVector.dim
    ]

    #############################################################
    # decoder reordering hook: propagation of beam hypotheses
    #############################################################

    # we bake into the LSTMs to multiply h and c with the 'beamSearchReorderHook' matrix, which is
    # a dummy in training but will be patched through model editing for beam decoding.
    # Specifically, the decoder will replace this by a per-sample matrix that reorders hypotheses according to
    # how they propagate. E.g. the 2nd best in a frame may be the history of the 3rd best in the subsequent frame

    beamSearchReorderHook = Pass (BS.Constants.OnesTensor (1:1))

    # helper functions to delay h and c that apply beam-search reordering, if so configured

    PreviousHCWithReorderingHook (lstmState, layerIndex=0) = [
       h = BS.Loop.Previous (lstmState.h * beamSearchReorderHook)             // hidden state(t-1)
       c = BS.Loop.Previous (lstmState.c * beamSearchReorderHook)             // cell(t-1)
       dim = lstmState.dim
    ]

    PreviousHCFromThoughtVectorWithReorderingHook (lstmState, layerIndex=0) =
        if layerIndex > 0 then PreviousHCWithReorderingHook (lstmState, layerIndex=1)
        else [ # with both thought vector and beam-search hook
            isFirst = BS.Loop.IsFirst (labelsEmbedded)
            h = BS.Boolean.If (isFirst, thoughtVectorBroadcast.h, BS.Loop.Previous (lstmState.h * beamSearchReorderHook))
            c = BS.Boolean.If (isFirst, thoughtVectorBroadcast.c, BS.Loop.Previous (lstmState.c * beamSearchReorderHook))
            dim = lstmState.dim
        ]

    #############################################################
    # decoder history hook: LM history, from ground truth vs. output
    #############################################################

    # these are the two choices for the input to the decoder network
    decoderHistoryFromGroundTruth = labelsEmbedded              # for training, decoder input is ground truth...
    decoderHistoryFromOutput = Pass (EmbedLabels (Hardmax (z))) # ...but for (greedy) decoding, the decoder's output is its previous input

    # during training, we use ground truth. For decoding, we will rewire decoderHistoryHook = decoderHistoryFromOutput
    decoderHistoryHook = Pass (decoderHistoryFromGroundTruth) # this gets redirected in decoding to feed back decoding output instead

    #############################################################
    # decoder
    #############################################################

    # There are three ways of passing encoder state:
    #  1. as initial state for decoder (Google style)
    #  2. as side information for every decoder step (NYU style)
    #  3. attention

    decoderInput    = Pass (BS.Boolean.If (isFirstLabel, labelSentenceStartEmbeddedScattered, BS.Loop.Previous (decoderHistoryHook)))
    decoderInputDim = labelEmbeddingDim

    decoderDynamicAxis = labelsEmbedded
    FixedWindowAttentionHook = BS.Seq2Seq.CreateAugmentWithFixedWindowAttentionHook (attentionDim, attentionSpan, decoderDynamicAxis, encoderOutput, enableSelfStabilization=useStabilizer)

    # some parameters to the decoder stack depend on the mode
    decoderParams =
        # with attention
        if useEncoder && attentionSpan > 0 then [
            previousHook = PreviousHCWithReorderingHook # add reordering for beam search
            augmentInputHook = FixedWindowAttentionHook # input gets augmented by the attention window
            augmentInputDim = encoderOutput.dim
        ]
        # with thought vector appended to every frame
        else if useEncoder && useNYUStyle then [
            previousHook = PreviousHCWithReorderingHook
            augmentInputHook (input, lstmState) = S(thoughtVectorBroadcast.h) # each input frame gets augmented by the thought vector
            augmentInputDim = thoughtVector.dim
        ]
        # thought vector as initial state for decoder
        else [
            previousHook = PreviousHCFromThoughtVectorWithReorderingHook # Previous() function with thought vector as initial state
            augmentInputHook = BS.RNNs.NoAuxInputHook
            augmentInputDim = 0
        ]

    # this is the decoder LSTM stack
    decoder = BS.RNNs.RecurrentLSTMPStack (decoderDims, cellDims=decoderDims,
                                           S(decoderInput), inputDim=decoderInputDim,
                                           augmentInputHook=decoderParams.augmentInputHook, augmentInputDim=decoderParams.augmentInputDim,
                                           previousHook=decoderParams.previousHook,
                                           enableSelfStabilization=useStabilizer)

    decoderOutputLayer = Length (decoder)-1
    decoderOutput = decoder[decoderOutputLayer].h
    decoderDim = decoderDims[decoderOutputLayer]

    #############################################################
    # softmax output layer
    #############################################################

    W = BS.Parameters.WeightParam (labelSequenceDim, decoderDim)
    B = BS.Parameters.BiasParam (labelSequenceDim)

    z = W * S(decoderOutput) + B;  // top-level input to Softmax

    #############################################################
    # training criteria
    #############################################################

    ce   = Pass (ReduceLogSum (z) - TransposeTimes (labelSequence,          z),  tag='criterion')
    errs = Pass (BS.Constants.One - TransposeTimes (labelSequence, Hardmax (z)), tag='evaluation')

    # score output for decoding
    scoreSequence = Pass (z)

    #############################################################
    # some helper functions
    #############################################################

    # these trace functions log their parameter's value
    TraceState (h, what) = Transpose (Trace (Transpose (h), say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=3, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ]))
    TraceDense (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=21, onlyUpToT=25, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ])
    TraceDenseTransposed (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = true ; precisionFormat = ".4" ])
    TraceOneHot (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "category" ; transpose = false ])
    TraceSparse (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "sparse" ; transpose = false ])
])

#######################################
#  TRAINING CONFIG                    #
#######################################

train = [
    action = "train"
    traceLevel = 1
    epochSize = 0               # (for quick tests, this can be overridden with something small)

    # BrainScriptNetworkBuilder is defined in outer scope

    SGD = [
        minibatchSize = $mbSizes$
        learningRatesPerSample = 0.007*2:0.0035 # works well for LMSequenceReader config
        momentumAsTimeConstant = 1100
        gradientClippingWithTruncation = true   # (as opposed to clipping the Frobenius norm of the matrix)
        clippingThresholdPerSample = 2.3   # visibly impacts objectives, but not final result, so keep it for safety
        maxEpochs = 50
        numMBsToShowResult = 100
        firstMBsToShowResult = 10
        gradUpdateType = "none" # TODO: Try FSAdaGrad?
        loadBestModel = false   # true # broken for some models (rereading overwrites something that got set by validation)

        dropoutRate = 0.0

        # settings for Auto Adjust Learning Rate
        AutoAdjust = [
            autoAdjustLR = "adjustAfterEpoch"
            reduceLearnRateIfImproveLessThan = 0.001
            continueReduce = false
            increaseLearnRateIfImproveMoreThan = 1000000000
            learnRateDecreaseFactor = 0.5
            learnRateIncreaseFactor = 1.382
            numMiniBatch4LRSearch = 100
            numPrevLearnRates = 5
            numBestSearchEpoch = 1
        ]
    ]

    # reader definitions
    reader = [
        readerType = "$readerType$"
        file = "$DataDir$/$trainFile$"
        randomize = "auto"

        # specific to CNTKTextFormatReader
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
        chunkSizeInBytes = 30000000         # large enough for entire data set
        input = $cntkReaderInputDef$

        # specific to LMSequenceReader
        mode = "softmax"                    # TODO: find out what this means
        nbruttsineachrecurrentiter = 0      # 0 means auto-fill given minibatch size
        cacheBlockSize = 100000000          # read block size. This value is large enough to load entire corpus at once
        rawInput = $lmSequenceReaderInputDef$
        inputLabelsDef = $lmSequenceReaderInputLabelsDef$
        outputDummy = [ labelType = "none" ]
    ]

    cvReader = [
        readerType = "$readerType$"
        file = "$DataDir$/$validFile$"
        randomize = "none"

        # specific to CNTKTextFormatReader
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
        input = $cntkReaderInputDef$

        # specific to LMSequenceReader
        mode = "softmax"                    # TODO: find out what this means
        nbruttsineachrecurrentiter = 0      # 0 means auto-fill given minibatch size
        cacheBlockSize = 100000000          # read block size. This value is large enough to load entire corpus at once
        rawInput = $lmSequenceReaderInputDef$
        inputLabelsDef = $lmSequenceReaderInputLabelsDef$
        outputDummy = [ labelType = "none" ]
    ]
]

#######################################
#  DUMP CONFIG                        #
#######################################

# dumps the model, specifically the learnable parameters

dump = [
    action = "dumpnode"
    modelPath = "$dumpModelPath$"
    outputFile = "$dumpModelPath$.txt"
]

#######################################
#  WRITE CONFIG                       #
#######################################

# This will decode the test set. The beamDepth parameter specifies the decoding mode:
#  beamDepth = 0: word prediction given ground truth history (only useful for perplexity measurement)
#  beamDepth = 1: greedy decoding: At each time step, choose a word greedily
#  beamDepth > 1: beam decoder. Keep 'beamDepth' best hypotheses, and output their globally best at the end.

write = [
    action = "write"

    # select the decoder
    BrainScriptNetworkBuilder = (
        # beamDepth = 0 will decode with the unmodified model.
        # beamDepth = 1 will modify the model to use the decoding output as the decoder's input.
        # beamDepth > 1 will modify the model to track multiple hypotheses and select the globally best at the end.
        if      $beamDepth$ == 0 then BS.Network.Load ("$decodeModelPath$")
        else if $beamDepth$ == 1 then BS.Seq2Seq.GreedySequenceDecoderFrom (BS.Network.Load ("$decodeModelPath$"))
        else                          BS.Seq2Seq.BeamSearchSequenceDecoderFrom (BS.Network.Load ("$decodeModelPath$"), $beamDepth$)
    )

    outputPath = $decodeOutputPath$
    #outputPath = "-"                    # "-" will write to stdout; useful for debugging

    # declare the nodes we want to write out
    # not all decoder configs have the same node names, so we just list them all
    #outputNodeNames = inputsOut:labelsOut:decodeOut:network.beamDecodingModel.inputsOut:network.beamDecodingModel.labelsOut:network.beamDecodingModel.decodeOut

    # output format
    # We configure the output to emit a flat sequence of token strings.
    format = [
        type = "category"
        transpose = false
        labelMappingFile = "$DataDir$/$mappingFile$"
    ]

    minibatchSize = 8192                # choose this to be big enough for the longest sentence
    traceLevel = 1
    epochSize = 0

    reader = [
        readerType = "$readerType$"
        file = "$DataDir$/$testFile$"
        randomize = "none"

        # specific to CNTKTextFormatReader
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
        input = $cntkReaderInputDef$

        # specific to LMSequenceReader
        mode = "softmax"                    # TODO: find out what this means
        nbruttsineachrecurrentiter = 1      # 1 means one sequence at a time
        # BUGBUG: ^^ =0 currently produces bad output. I suspect Times (data, data)
        cacheBlockSize = 100000000          # read block size. This value is large enough to load entire corpus at once
        rawInput = $lmSequenceReaderInputDef$
        inputLabelsDef = $lmSequenceReaderInputLabelsDef$
        outputDummy = [ labelType = "none" ]
    ]
]

Here is the log file

-------------------------------------------------------------------
Build info: 

        Built time: Jun  6 2016 20:07:13
        Last modified date: Mon Jun  6 16:04:52 2016
        Build type: release
        Build target: GPU
        With 1bit-SGD: no
        Math lib: acml
        CUDA_PATH: /usr/local/cuda-7.5
        CUB_PATH: /usr/local/cub-1.4.1
        CUDNN_PATH: /usr/local/cudnn-4.0
        Build Branch: HEAD
        Build SHA1: b7ed8dc9e5cd8ab35f4badae86dd42e93e9f2564
        Built by philly on 19202f7ff7b2
        Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------

Running on localhost at 2016/08/18 20:17:55
Command line: 
cntk  configFile=../Config/G2P.cntk

>>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
RunRootDir = "../.."      
DataDir    = "$RunRootDir$/Data"
OutDir     = "$RunRootDir$/Out"
command = train
makeMode = false          
traceLevel = 1
deviceId = 0              
ExpId = g2p-01-$deviceId$ 
modelPath  = "$OutDir$/$ExpId$/G2P.dnn"
stderr     = "$OutDir$/$ExpId$/G2P"
beamDepth = 3                                      
decodeModel = 9
decodeModelPath = "$modelPath$.$decodeModel$"      
decodeOutputPath = "$decodeModelPath$.$beamDepth$" 
dumpModelPath = "$modelPath$.2" 
hiddenDim = 512 
precision  = "float"
maxLayer = 2
isBidirectional = false
readerType = "CNTKTextFormatReader"
useCNTKTextFormatReader = true
inputVocabSize = 29     
labelVocabSize = 41     
mbSizes = 72:72:144:288 
shareEmbeddings = false
fileExt = "bsf.ctf"
maxLength = 20           
startSymbol = "<s>"      
trainFile   = "cmudict-0.7b.train-dev-20-21.$fileExt$"
validFile   = "cmudict-0.7b.train-dev-1-21.$fileExt$"
testFile    = "cmudict-0.7b.test.$fileExt$"
mappingFile = "cmudict-0.7b.mapping"
cntkReaderInputDef             = [ rawInput = [ alias = "s" ; dim = $inputVocabSize$ ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = $labelVocabSize$ ;  format = "sparse" ] ]
lmSequenceReaderInputDef       = [ dim = 0 ]
lmSequenceReaderInputLabelsDef = [ dim = 1 ; labelType = "category" ;  labelDim = "$inputVocabSize$" ; labelMappingFile = "$DataDir$/$mappingFile$" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]
BrainScriptNetworkBuilder = (new ComputationNetwork [
    useCNTKTextFormatReader = $useCNTKTextFormatReader$
    inputVocabDim = $inputVocabSize$
    labelVocabDim = $labelVocabSize$
isAutoencoder = false               
attentionSpan = $maxLength$         
useBidirectionalEncoder = $isBidirectional$ 
    shareEmbeddings = $shareEmbeddings$
    hiddenDim       = $hiddenDim$
attentionDim    = 128               
maxLayer        = $maxLayer$        
    useStabilizer = true
useEncoder    = true                
useNYUStyle   = false               
    embeddingDim = 300
    inputEmbeddingDim = if inputVocabDim < 300 then inputVocabDim else embeddingDim
    labelEmbeddingDim = if labelVocabDim < 300 then labelVocabDim else embeddingDim
encoderDims[i:0..maxLayer] = hiddenDim 
decoderDims[i:0..maxLayer] = hiddenDim 
    inputAxis = DynamicAxis()
    rawInput  = if useCNTKTextFormatReader && !isAutoencoder
                then Input (inputVocabDim, dynamicAxis=inputAxis, tag='feature')
                else Input (inputVocabDim,                        tag='feature')
    rawLabels = if useCNTKTextFormatReader && !isAutoencoder
                then Input (labelVocabDim, tag='label')
                else rawInput
    streams = [
        out = if isAutoencoder || useCNTKTextFormatReader then [
            input  = TraceSparse (rawInput, 'inp')
            labels = TraceSparse (rawLabels, 'lbl')
        ]
        else [
separatorRow = 2                                                                             
isSeparator = RowSlice (separatorRow, 1, rawInput)                                           
inInput  = BS.Boolean.Or (FutureValue (1, inInput , defaultHiddenActivation=0), isSeparator) 
inLabels = BS.Boolean.Or (PastValue   (1, inLabels, defaultHiddenActivation=0), isSeparator) 
input  = BS.Sequences.Gather (inInput,  rawInput)                                            
labels = BS.Sequences.Gather (inLabels, rawInput)                                            
        ]
    ].out
inputSequence = Pass (streams.input)                             
labelSequence = Pass (Slice (1,  0, streams.labels,  axis=-1))   
labelSentenceStart = Pass (BS.Sequences.First (streams.labels))  
    inputSequenceDim = inputVocabDim
    labelSequenceDim = labelVocabDim
    isFirstLabel = BS.Loop.IsFirst (labelSequence)
    Einput  =                                     BS.Parameters.WeightParam (inputSequenceDim, inputEmbeddingDim)
    Elabels = if shareEmbeddings then Einput else BS.Parameters.WeightParam (labelSequenceDim, labelEmbeddingDim)
    EmbedInput (x)  = if inputSequenceDim == inputEmbeddingDim then x else TransposeTimes (Einput, x)
    EmbedLabels (x) = if labelSequenceDim == labelEmbeddingDim then x else TransposeTimes (Elabels, x)
    inputEmbedded  = EmbedInput  (inputSequence)
    labelsEmbedded = EmbedLabels (labelSequence)
labelSentenceStartEmbedded = Pass (EmbedLabels (labelSentenceStart))  
labelSentenceStartEmbeddedScattered = BS.Sequences.Scatter (isFirstLabel, labelSentenceStartEmbedded) 
    S(x) = BS.Parameters.Stabilize (x, enabled=useStabilizer)
    encoderFunction = if useBidirectionalEncoder then BS.RNNs.RecurrentBirectionalLSTMPStack else BS.RNNs.RecurrentLSTMPStack
    encoder = encoderFunction (encoderDims, cellDims=encoderDims, S(inputEmbedded), inputDim=inputEmbeddingDim,
        previousHook=if useBidirectionalEncoder then BS.RNNs.PreviousHC else BS.RNNs.NextHC,
        enableSelfStabilization=useStabilizer)
    encoderOutput = encoder[Length (encoderDims)-1]
    thoughtVector = [
        h = ReshapeDimension (BS.Sequences.First (encoderOutput.h), 1, (dim:1))
        c = ReshapeDimension (BS.Sequences.First (encoderOutput.c), 1, (dim:1))
        dim = encoderOutput.dim
    ]
thoughtVectorBroadcast = [ 
        h = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.h)
        c = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.c)
        dim = thoughtVector.dim
    ]
    beamSearchReorderHook = Pass (BS.Constants.OnesTensor (1:1))
    PreviousHCWithReorderingHook (lstmState, layerIndex=0) = [
       h = BS.Loop.Previous (lstmState.h * beamSearchReorderHook)             // hidden state(t-1)
       c = BS.Loop.Previous (lstmState.c * beamSearchReorderHook)             // cell(t-1)
       dim = lstmState.dim
    ]
    PreviousHCFromThoughtVectorWithReorderingHook (lstmState, layerIndex=0) =
        if layerIndex > 0 then PreviousHCWithReorderingHook (lstmState, layerIndex=1)
else [ 
            isFirst = BS.Loop.IsFirst (labelsEmbedded)
            h = BS.Boolean.If (isFirst, thoughtVectorBroadcast.h, BS.Loop.Previous (lstmState.h * beamSearchReorderHook))
            c = BS.Boolean.If (isFirst, thoughtVectorBroadcast.c, BS.Loop.Previous (lstmState.c * beamSearchReorderHook))
            dim = lstmState.dim
        ]
decoderHistoryFromGroundTruth = labelsEmbedded              
decoderHistoryFromOutput = Pass (EmbedLabels (Hardmax (z))) 
decoderHistoryHook = Pass (decoderHistoryFromGroundTruth) 
    decoderInput    = Pass (BS.Boolean.If (isFirstLabel, labelSentenceStartEmbeddedScattered, BS.Loop.Previous (decoderHistoryHook)))
    decoderInputDim = labelEmbeddingDim
    decoderDynamicAxis = labelsEmbedded
    FixedWindowAttentionHook = BS.Seq2Seq.CreateAugmentWithFixedWindowAttentionHook (attentionDim, attentionSpan, decoderDynamicAxis, encoderOutput, enableSelfStabilization=useStabilizer)
    decoderParams =
        if useEncoder && attentionSpan > 0 then [
previousHook = PreviousHCWithReorderingHook 
augmentInputHook = FixedWindowAttentionHook 
            augmentInputDim = encoderOutput.dim
        ]
        else if useEncoder && useNYUStyle then [
            previousHook = PreviousHCWithReorderingHook
augmentInputHook (input, lstmState) = S(thoughtVectorBroadcast.h) 
            augmentInputDim = thoughtVector.dim
        ]
        else [
previousHook = PreviousHCFromThoughtVectorWithReorderingHook 
            augmentInputHook = BS.RNNs.NoAuxInputHook
            augmentInputDim = 0
        ]
    decoder = BS.RNNs.RecurrentLSTMPStack (decoderDims, cellDims=decoderDims,
                                           S(decoderInput), inputDim=decoderInputDim,
                                           augmentInputHook=decoderParams.augmentInputHook, augmentInputDim=decoderParams.augmentInputDim,
                                           previousHook=decoderParams.previousHook,
                                           enableSelfStabilization=useStabilizer)
    decoderOutputLayer = Length (decoder)-1
    decoderOutput = decoder[decoderOutputLayer].h
    decoderDim = decoderDims[decoderOutputLayer]
    W = BS.Parameters.WeightParam (labelSequenceDim, decoderDim)
    B = BS.Parameters.BiasParam (labelSequenceDim)
    z = W * S(decoderOutput) + B;  // top-level input to Softmax
    ce   = Pass (ReduceLogSum (z) - TransposeTimes (labelSequence,          z),  tag='criterion')
    errs = Pass (BS.Constants.One - TransposeTimes (labelSequence, Hardmax (z)), tag='evaluation')
    scoreSequence = Pass (z)
    TraceState (h, what) = Transpose (Trace (Transpose (h), say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=3, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ]))
    TraceDense (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=21, onlyUpToT=25, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ])
    TraceDenseTransposed (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = true ; precisionFormat = ".4" ])
    TraceOneHot (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "category" ; transpose = false ])
    TraceSparse (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "sparse" ; transpose = false ])
])
train = [
    action = "train"
    traceLevel = 1
epochSize = 0               
    SGD = [
        minibatchSize = $mbSizes$
learningRatesPerSample = 0.007*2:0.0035 
        momentumAsTimeConstant = 1100
gradientClippingWithTruncation = true   
clippingThresholdPerSample = 2.3   
        maxEpochs = 50
        numMBsToShowResult = 100
        firstMBsToShowResult = 10
gradUpdateType = "none" 
loadBestModel = false   
        dropoutRate = 0.0
        AutoAdjust = [
            autoAdjustLR = "adjustAfterEpoch"
            reduceLearnRateIfImproveLessThan = 0.001
            continueReduce = false
            increaseLearnRateIfImproveMoreThan = 1000000000
            learnRateDecreaseFactor = 0.5
            learnRateIncreaseFactor = 1.382
            numMiniBatch4LRSearch = 100
            numPrevLearnRates = 5
            numBestSearchEpoch = 1
        ]
    ]
    reader = [
        readerType = "$readerType$"
        file = "$DataDir$/$trainFile$"
        randomize = "auto"
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
chunkSizeInBytes = 30000000         
        input = $cntkReaderInputDef$
mode = "softmax"                    
nbruttsineachrecurrentiter = 0      
cacheBlockSize = 100000000          
        rawInput = $lmSequenceReaderInputDef$
        inputLabelsDef = $lmSequenceReaderInputLabelsDef$
        outputDummy = [ labelType = "none" ]
    ]
    cvReader = [
        readerType = "$readerType$"
        file = "$DataDir$/$validFile$"
        randomize = "none"
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
        input = $cntkReaderInputDef$
mode = "softmax"                    
nbruttsineachrecurrentiter = 0      
cacheBlockSize = 100000000          
        rawInput = $lmSequenceReaderInputDef$
        inputLabelsDef = $lmSequenceReaderInputLabelsDef$
        outputDummy = [ labelType = "none" ]
    ]
]
dump = [
    action = "dumpnode"
    modelPath = "$dumpModelPath$"
    outputFile = "$dumpModelPath$.txt"
]
write = [
    action = "write"
    BrainScriptNetworkBuilder = (
        if      $beamDepth$ == 0 then BS.Network.Load ("$decodeModelPath$")
        else if $beamDepth$ == 1 then BS.Seq2Seq.GreedySequenceDecoderFrom (BS.Network.Load ("$decodeModelPath$"))
        else                          BS.Seq2Seq.BeamSearchSequenceDecoderFrom (BS.Network.Load ("$decodeModelPath$"), $beamDepth$)
    )
    outputPath = $decodeOutputPath$
    format = [
        type = "category"
        transpose = false
        labelMappingFile = "$DataDir$/$mappingFile$"
    ]
minibatchSize = 8192                
    traceLevel = 1
    epochSize = 0
    reader = [
        readerType = "$readerType$"
        file = "$DataDir$/$testFile$"
        randomize = "none"
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
        input = $cntkReaderInputDef$
mode = "softmax"                    
nbruttsineachrecurrentiter = 1      
cacheBlockSize = 100000000          
        rawInput = $lmSequenceReaderInputDef$
        inputLabelsDef = $lmSequenceReaderInputLabelsDef$
        outputDummy = [ labelType = "none" ]
    ]
]

<<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED)  <<<<<<<<<<<<<<<<<<<<

>>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
RunRootDir = "../.."      
DataDir    = "../../Data"
OutDir     = "../../Out"
command = train
makeMode = false          
traceLevel = 1
deviceId = 0              
ExpId = g2p-01-0 
modelPath  = "../../Out/g2p-01-0/G2P.dnn"
stderr     = "../../Out/g2p-01-0/G2P"
beamDepth = 3                                      
decodeModel = 9
decodeModelPath = "../../Out/g2p-01-0/G2P.dnn.9"      
decodeOutputPath = "../../Out/g2p-01-0/G2P.dnn.9.3" 
dumpModelPath = "../../Out/g2p-01-0/G2P.dnn.2" 
hiddenDim = 512 
precision  = "float"
maxLayer = 2
isBidirectional = false
readerType = "CNTKTextFormatReader"
useCNTKTextFormatReader = true
inputVocabSize = 29     
labelVocabSize = 41     
mbSizes = 72:72:144:288 
shareEmbeddings = false
fileExt = "bsf.ctf"
maxLength = 20           
startSymbol = "<s>"      
trainFile   = "cmudict-0.7b.train-dev-20-21.bsf.ctf"
validFile   = "cmudict-0.7b.train-dev-1-21.bsf.ctf"
testFile    = "cmudict-0.7b.test.bsf.ctf"
mappingFile = "cmudict-0.7b.mapping"
cntkReaderInputDef             = [ rawInput = [ alias = "s" ; dim = 29 ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = 41 ;  format = "sparse" ] ]
lmSequenceReaderInputDef       = [ dim = 0 ]
lmSequenceReaderInputLabelsDef = [ dim = 1 ; labelType = "category" ;  labelDim = "29" ; labelMappingFile = "../../Data/cmudict-0.7b.mapping" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]
BrainScriptNetworkBuilder = (new ComputationNetwork [
    useCNTKTextFormatReader = true
    inputVocabDim = 29
    labelVocabDim = 41
isAutoencoder = false               
attentionSpan = 20         
useBidirectionalEncoder = false 
    shareEmbeddings = false
    hiddenDim       = 512
attentionDim    = 128               
maxLayer        = 2        
    useStabilizer = true
useEncoder    = true                
useNYUStyle   = false               
    embeddingDim = 300
    inputEmbeddingDim = if inputVocabDim < 300 then inputVocabDim else embeddingDim
    labelEmbeddingDim = if labelVocabDim < 300 then labelVocabDim else embeddingDim
encoderDims[i:0..maxLayer] = hiddenDim 
decoderDims[i:0..maxLayer] = hiddenDim 
    inputAxis = DynamicAxis()
    rawInput  = if useCNTKTextFormatReader && !isAutoencoder
                then Input (inputVocabDim, dynamicAxis=inputAxis, tag='feature')
                else Input (inputVocabDim,                        tag='feature')
    rawLabels = if useCNTKTextFormatReader && !isAutoencoder
                then Input (labelVocabDim, tag='label')
                else rawInput
    streams = [
        out = if isAutoencoder || useCNTKTextFormatReader then [
            input  = TraceSparse (rawInput, 'inp')
            labels = TraceSparse (rawLabels, 'lbl')
        ]
        else [
separatorRow = 2                                                                             
isSeparator = RowSlice (separatorRow, 1, rawInput)                                           
inInput  = BS.Boolean.Or (FutureValue (1, inInput , defaultHiddenActivation=0), isSeparator) 
inLabels = BS.Boolean.Or (PastValue   (1, inLabels, defaultHiddenActivation=0), isSeparator) 
input  = BS.Sequences.Gather (inInput,  rawInput)                                            
labels = BS.Sequences.Gather (inLabels, rawInput)                                            
        ]
    ].out
inputSequence = Pass (streams.input)                             
labelSequence = Pass (Slice (1,  0, streams.labels,  axis=-1))   
labelSentenceStart = Pass (BS.Sequences.First (streams.labels))  
    inputSequenceDim = inputVocabDim
    labelSequenceDim = labelVocabDim
    isFirstLabel = BS.Loop.IsFirst (labelSequence)
    Einput  =                                     BS.Parameters.WeightParam (inputSequenceDim, inputEmbeddingDim)
    Elabels = if shareEmbeddings then Einput else BS.Parameters.WeightParam (labelSequenceDim, labelEmbeddingDim)
    EmbedInput (x)  = if inputSequenceDim == inputEmbeddingDim then x else TransposeTimes (Einput, x)
    EmbedLabels (x) = if labelSequenceDim == labelEmbeddingDim then x else TransposeTimes (Elabels, x)
    inputEmbedded  = EmbedInput  (inputSequence)
    labelsEmbedded = EmbedLabels (labelSequence)
labelSentenceStartEmbedded = Pass (EmbedLabels (labelSentenceStart))  
labelSentenceStartEmbeddedScattered = BS.Sequences.Scatter (isFirstLabel, labelSentenceStartEmbedded) 
    S(x) = BS.Parameters.Stabilize (x, enabled=useStabilizer)
    encoderFunction = if useBidirectionalEncoder then BS.RNNs.RecurrentBirectionalLSTMPStack else BS.RNNs.RecurrentLSTMPStack
    encoder = encoderFunction (encoderDims, cellDims=encoderDims, S(inputEmbedded), inputDim=inputEmbeddingDim,
        previousHook=if useBidirectionalEncoder then BS.RNNs.PreviousHC else BS.RNNs.NextHC,
        enableSelfStabilization=useStabilizer)
    encoderOutput = encoder[Length (encoderDims)-1]
    thoughtVector = [
        h = ReshapeDimension (BS.Sequences.First (encoderOutput.h), 1, (dim:1))
        c = ReshapeDimension (BS.Sequences.First (encoderOutput.c), 1, (dim:1))
        dim = encoderOutput.dim
    ]
thoughtVectorBroadcast = [ 
        h = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.h)
        c = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.c)
        dim = thoughtVector.dim
    ]
    beamSearchReorderHook = Pass (BS.Constants.OnesTensor (1:1))
    PreviousHCWithReorderingHook (lstmState, layerIndex=0) = [
       h = BS.Loop.Previous (lstmState.h * beamSearchReorderHook)             // hidden state(t-1)
       c = BS.Loop.Previous (lstmState.c * beamSearchReorderHook)             // cell(t-1)
       dim = lstmState.dim
    ]
    PreviousHCFromThoughtVectorWithReorderingHook (lstmState, layerIndex=0) =
        if layerIndex > 0 then PreviousHCWithReorderingHook (lstmState, layerIndex=1)
else [ 
            isFirst = BS.Loop.IsFirst (labelsEmbedded)
            h = BS.Boolean.If (isFirst, thoughtVectorBroadcast.h, BS.Loop.Previous (lstmState.h * beamSearchReorderHook))
            c = BS.Boolean.If (isFirst, thoughtVectorBroadcast.c, BS.Loop.Previous (lstmState.c * beamSearchReorderHook))
            dim = lstmState.dim
        ]
decoderHistoryFromGroundTruth = labelsEmbedded              
decoderHistoryFromOutput = Pass (EmbedLabels (Hardmax (z))) 
decoderHistoryHook = Pass (decoderHistoryFromGroundTruth) 
    decoderInput    = Pass (BS.Boolean.If (isFirstLabel, labelSentenceStartEmbeddedScattered, BS.Loop.Previous (decoderHistoryHook)))
    decoderInputDim = labelEmbeddingDim
    decoderDynamicAxis = labelsEmbedded
    FixedWindowAttentionHook = BS.Seq2Seq.CreateAugmentWithFixedWindowAttentionHook (attentionDim, attentionSpan, decoderDynamicAxis, encoderOutput, enableSelfStabilization=useStabilizer)
    decoderParams =
        if useEncoder && attentionSpan > 0 then [
previousHook = PreviousHCWithReorderingHook 
augmentInputHook = FixedWindowAttentionHook 
            augmentInputDim = encoderOutput.dim
        ]
        else if useEncoder && useNYUStyle then [
            previousHook = PreviousHCWithReorderingHook
augmentInputHook (input, lstmState) = S(thoughtVectorBroadcast.h) 
            augmentInputDim = thoughtVector.dim
        ]
        else [
previousHook = PreviousHCFromThoughtVectorWithReorderingHook 
            augmentInputHook = BS.RNNs.NoAuxInputHook
            augmentInputDim = 0
        ]
    decoder = BS.RNNs.RecurrentLSTMPStack (decoderDims, cellDims=decoderDims,
                                           S(decoderInput), inputDim=decoderInputDim,
                                           augmentInputHook=decoderParams.augmentInputHook, augmentInputDim=decoderParams.augmentInputDim,
                                           previousHook=decoderParams.previousHook,
                                           enableSelfStabilization=useStabilizer)
    decoderOutputLayer = Length (decoder)-1
    decoderOutput = decoder[decoderOutputLayer].h
    decoderDim = decoderDims[decoderOutputLayer]
    W = BS.Parameters.WeightParam (labelSequenceDim, decoderDim)
    B = BS.Parameters.BiasParam (labelSequenceDim)
    z = W * S(decoderOutput) + B;  // top-level input to Softmax
    ce   = Pass (ReduceLogSum (z) - TransposeTimes (labelSequence,          z),  tag='criterion')
    errs = Pass (BS.Constants.One - TransposeTimes (labelSequence, Hardmax (z)), tag='evaluation')
    scoreSequence = Pass (z)
    TraceState (h, what) = Transpose (Trace (Transpose (h), say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=3, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ]))
    TraceDense (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=21, onlyUpToT=25, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ])
    TraceDenseTransposed (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = true ; precisionFormat = ".4" ])
    TraceOneHot (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "category" ; transpose = false ])
    TraceSparse (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "sparse" ; transpose = false ])
])
train = [
    action = "train"
    traceLevel = 1
epochSize = 0               
    SGD = [
        minibatchSize = 72:72:144:288
learningRatesPerSample = 0.007*2:0.0035 
        momentumAsTimeConstant = 1100
gradientClippingWithTruncation = true   
clippingThresholdPerSample = 2.3   
        maxEpochs = 50
        numMBsToShowResult = 100
        firstMBsToShowResult = 10
gradUpdateType = "none" 
loadBestModel = false   
        dropoutRate = 0.0
        AutoAdjust = [
            autoAdjustLR = "adjustAfterEpoch"
            reduceLearnRateIfImproveLessThan = 0.001
            continueReduce = false
            increaseLearnRateIfImproveMoreThan = 1000000000
            learnRateDecreaseFactor = 0.5
            learnRateIncreaseFactor = 1.382
            numMiniBatch4LRSearch = 100
            numPrevLearnRates = 5
            numBestSearchEpoch = 1
        ]
    ]
    reader = [
        readerType = "CNTKTextFormatReader"
        file = "../../Data/cmudict-0.7b.train-dev-20-21.bsf.ctf"
        randomize = "auto"
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
chunkSizeInBytes = 30000000         
        input = [ rawInput = [ alias = "s" ; dim = 29 ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = 41 ;  format = "sparse" ] ]
mode = "softmax"                    
nbruttsineachrecurrentiter = 0      
cacheBlockSize = 100000000          
        rawInput = [ dim = 0 ]
        inputLabelsDef = [ dim = 1 ; labelType = "category" ;  labelDim = "29" ; labelMappingFile = "../../Data/cmudict-0.7b.mapping" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]
        outputDummy = [ labelType = "none" ]
    ]
    cvReader = [
        readerType = "CNTKTextFormatReader"
        file = "../../Data/cmudict-0.7b.train-dev-1-21.bsf.ctf"
        randomize = "none"
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
        input = [ rawInput = [ alias = "s" ; dim = 29 ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = 41 ;  format = "sparse" ] ]
mode = "softmax"                    
nbruttsineachrecurrentiter = 0      
cacheBlockSize = 100000000          
        rawInput = [ dim = 0 ]
        inputLabelsDef = [ dim = 1 ; labelType = "category" ;  labelDim = "29" ; labelMappingFile = "../../Data/cmudict-0.7b.mapping" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]
        outputDummy = [ labelType = "none" ]
    ]
]
dump = [
    action = "dumpnode"
    modelPath = "../../Out/g2p-01-0/G2P.dnn.2"
    outputFile = "../../Out/g2p-01-0/G2P.dnn.2.txt"
]
write = [
    action = "write"
    BrainScriptNetworkBuilder = (
        if      3 == 0 then BS.Network.Load ("../../Out/g2p-01-0/G2P.dnn.9")
        else if 3 == 1 then BS.Seq2Seq.GreedySequenceDecoderFrom (BS.Network.Load ("../../Out/g2p-01-0/G2P.dnn.9"))
        else                          BS.Seq2Seq.BeamSearchSequenceDecoderFrom (BS.Network.Load ("../../Out/g2p-01-0/G2P.dnn.9"), 3)
    )
    outputPath = ../../Out/g2p-01-0/G2P.dnn.9.3
    format = [
        type = "category"
        transpose = false
        labelMappingFile = "../../Data/cmudict-0.7b.mapping"
    ]
minibatchSize = 8192                
    traceLevel = 1
    epochSize = 0
    reader = [
        readerType = "CNTKTextFormatReader"
        file = "../../Data/cmudict-0.7b.test.bsf.ctf"
        randomize = "none"
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
        input = [ rawInput = [ alias = "s" ; dim = 29 ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = 41 ;  format = "sparse" ] ]
mode = "softmax"                    
nbruttsineachrecurrentiter = 1      
cacheBlockSize = 100000000          
        rawInput = [ dim = 0 ]
        inputLabelsDef = [ dim = 1 ; labelType = "category" ;  labelDim = "29" ; labelMappingFile = "../../Data/cmudict-0.7b.mapping" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]
        outputDummy = [ labelType = "none" ]
    ]
]

<<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<

>>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: G2P.cntk:beamDepth=3
configparameters: G2P.cntk:BrainScriptNetworkBuilder=(new ComputationNetwork [
    useCNTKTextFormatReader = true
    inputVocabDim = 29
    labelVocabDim = 41
isAutoencoder = false               
attentionSpan = 20         
useBidirectionalEncoder = false 
    shareEmbeddings = false
    hiddenDim       = 512
attentionDim    = 128               
maxLayer        = 2        
    useStabilizer = true
useEncoder    = true                
useNYUStyle   = false               
    embeddingDim = 300
    inputEmbeddingDim = if inputVocabDim < 300 then inputVocabDim else embeddingDim
    labelEmbeddingDim = if labelVocabDim < 300 then labelVocabDim else embeddingDim
encoderDims[i:0..maxLayer] = hiddenDim 
decoderDims[i:0..maxLayer] = hiddenDim 
    inputAxis = DynamicAxis()
    rawInput  = if useCNTKTextFormatReader && !isAutoencoder
                then Input (inputVocabDim, dynamicAxis=inputAxis, tag='feature')
                else Input (inputVocabDim,                        tag='feature')
    rawLabels = if useCNTKTextFormatReader && !isAutoencoder
                then Input (labelVocabDim, tag='label')
                else rawInput
    streams = [
        out = if isAutoencoder || useCNTKTextFormatReader then [
            input  = TraceSparse (rawInput, 'inp')
            labels = TraceSparse (rawLabels, 'lbl')
        ]
        else [
separatorRow = 2                                                                             
isSeparator = RowSlice (separatorRow, 1, rawInput)                                           
inInput  = BS.Boolean.Or (FutureValue (1, inInput , defaultHiddenActivation=0), isSeparator) 
inLabels = BS.Boolean.Or (PastValue   (1, inLabels, defaultHiddenActivation=0), isSeparator) 
input  = BS.Sequences.Gather (inInput,  rawInput)                                            
labels = BS.Sequences.Gather (inLabels, rawInput)                                            
        ]
    ].out
inputSequence = Pass (streams.input)                             
labelSequence = Pass (Slice (1,  0, streams.labels,  axis=-1))   
labelSentenceStart = Pass (BS.Sequences.First (streams.labels))  
    inputSequenceDim = inputVocabDim
    labelSequenceDim = labelVocabDim
    isFirstLabel = BS.Loop.IsFirst (labelSequence)
    Einput  =                                     BS.Parameters.WeightParam (inputSequenceDim, inputEmbeddingDim)
    Elabels = if shareEmbeddings then Einput else BS.Parameters.WeightParam (labelSequenceDim, labelEmbeddingDim)
    EmbedInput (x)  = if inputSequenceDim == inputEmbeddingDim then x else TransposeTimes (Einput, x)
    EmbedLabels (x) = if labelSequenceDim == labelEmbeddingDim then x else TransposeTimes (Elabels, x)
    inputEmbedded  = EmbedInput  (inputSequence)
    labelsEmbedded = EmbedLabels (labelSequence)
labelSentenceStartEmbedded = Pass (EmbedLabels (labelSentenceStart))  
labelSentenceStartEmbeddedScattered = BS.Sequences.Scatter (isFirstLabel, labelSentenceStartEmbedded) 
    S(x) = BS.Parameters.Stabilize (x, enabled=useStabilizer)
    encoderFunction = if useBidirectionalEncoder then BS.RNNs.RecurrentBirectionalLSTMPStack else BS.RNNs.RecurrentLSTMPStack
    encoder = encoderFunction (encoderDims, cellDims=encoderDims, S(inputEmbedded), inputDim=inputEmbeddingDim,
        previousHook=if useBidirectionalEncoder then BS.RNNs.PreviousHC else BS.RNNs.NextHC,
        enableSelfStabilization=useStabilizer)
    encoderOutput = encoder[Length (encoderDims)-1]
    thoughtVector = [
        h = ReshapeDimension (BS.Sequences.First (encoderOutput.h), 1, (dim:1))
        c = ReshapeDimension (BS.Sequences.First (encoderOutput.c), 1, (dim:1))
        dim = encoderOutput.dim
    ]
thoughtVectorBroadcast = [ 
        h = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.h)
        c = BS.Sequences.BroadcastSequenceAs (labelsEmbedded, thoughtVector.c)
        dim = thoughtVector.dim
    ]
    beamSearchReorderHook = Pass (BS.Constants.OnesTensor (1:1))
    PreviousHCWithReorderingHook (lstmState, layerIndex=0) = [
       h = BS.Loop.Previous (lstmState.h * beamSearchReorderHook)             // hidden state(t-1)
       c = BS.Loop.Previous (lstmState.c * beamSearchReorderHook)             // cell(t-1)
       dim = lstmState.dim
    ]
    PreviousHCFromThoughtVectorWithReorderingHook (lstmState, layerIndex=0) =
        if layerIndex > 0 then PreviousHCWithReorderingHook (lstmState, layerIndex=1)
else [ 
            isFirst = BS.Loop.IsFirst (labelsEmbedded)
            h = BS.Boolean.If (isFirst, thoughtVectorBroadcast.h, BS.Loop.Previous (lstmState.h * beamSearchReorderHook))
            c = BS.Boolean.If (isFirst, thoughtVectorBroadcast.c, BS.Loop.Previous (lstmState.c * beamSearchReorderHook))
            dim = lstmState.dim
        ]
decoderHistoryFromGroundTruth = labelsEmbedded              
decoderHistoryFromOutput = Pass (EmbedLabels (Hardmax (z))) 
decoderHistoryHook = Pass (decoderHistoryFromGroundTruth) 
    decoderInput    = Pass (BS.Boolean.If (isFirstLabel, labelSentenceStartEmbeddedScattered, BS.Loop.Previous (decoderHistoryHook)))
    decoderInputDim = labelEmbeddingDim
    decoderDynamicAxis = labelsEmbedded
    FixedWindowAttentionHook = BS.Seq2Seq.CreateAugmentWithFixedWindowAttentionHook (attentionDim, attentionSpan, decoderDynamicAxis, encoderOutput, enableSelfStabilization=useStabilizer)
    decoderParams =
        if useEncoder && attentionSpan > 0 then [
previousHook = PreviousHCWithReorderingHook 
augmentInputHook = FixedWindowAttentionHook 
            augmentInputDim = encoderOutput.dim
        ]
        else if useEncoder && useNYUStyle then [
            previousHook = PreviousHCWithReorderingHook
augmentInputHook (input, lstmState) = S(thoughtVectorBroadcast.h) 
            augmentInputDim = thoughtVector.dim
        ]
        else [
previousHook = PreviousHCFromThoughtVectorWithReorderingHook 
            augmentInputHook = BS.RNNs.NoAuxInputHook
            augmentInputDim = 0
        ]
    decoder = BS.RNNs.RecurrentLSTMPStack (decoderDims, cellDims=decoderDims,
                                           S(decoderInput), inputDim=decoderInputDim,
                                           augmentInputHook=decoderParams.augmentInputHook, augmentInputDim=decoderParams.augmentInputDim,
                                           previousHook=decoderParams.previousHook,
                                           enableSelfStabilization=useStabilizer)
    decoderOutputLayer = Length (decoder)-1
    decoderOutput = decoder[decoderOutputLayer].h
    decoderDim = decoderDims[decoderOutputLayer]
    W = BS.Parameters.WeightParam (labelSequenceDim, decoderDim)
    B = BS.Parameters.BiasParam (labelSequenceDim)
    z = W * S(decoderOutput) + B;  // top-level input to Softmax
    ce   = Pass (ReduceLogSum (z) - TransposeTimes (labelSequence,          z),  tag='criterion')
    errs = Pass (BS.Constants.One - TransposeTimes (labelSequence, Hardmax (z)), tag='evaluation')
    scoreSequence = Pass (z)
    TraceState (h, what) = Transpose (Trace (Transpose (h), say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=3, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ]))
    TraceDense (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=21, onlyUpToT=25, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ])
    TraceDenseTransposed (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = true ; precisionFormat = ".4" ])
    TraceOneHot (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "category" ; transpose = false ])
    TraceSparse (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "sparse" ; transpose = false ])
])

configparameters: G2P.cntk:cntkReaderInputDef=[ rawInput = [ alias = "s" ; dim = 29 ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = 41 ;  format = "sparse" ] ]
configparameters: G2P.cntk:command=train
configparameters: G2P.cntk:DataDir=../../Data
configparameters: G2P.cntk:decodeModel=9
configparameters: G2P.cntk:decodeModelPath=../../Out/g2p-01-0/G2P.dnn.9
configparameters: G2P.cntk:decodeOutputPath=../../Out/g2p-01-0/G2P.dnn.9.3
configparameters: G2P.cntk:deviceId=0
configparameters: G2P.cntk:dump=[
    action = "dumpnode"
    modelPath = "../../Out/g2p-01-0/G2P.dnn.2"
    outputFile = "../../Out/g2p-01-0/G2P.dnn.2.txt"
]

configparameters: G2P.cntk:dumpModelPath=../../Out/g2p-01-0/G2P.dnn.2
configparameters: G2P.cntk:ExpId=g2p-01-0
configparameters: G2P.cntk:fileExt=bsf.ctf
configparameters: G2P.cntk:hiddenDim=512
configparameters: G2P.cntk:inputVocabSize=29
configparameters: G2P.cntk:isBidirectional=false
configparameters: G2P.cntk:labelVocabSize=41
configparameters: G2P.cntk:lmSequenceReaderInputDef=[ dim = 0 ]
configparameters: G2P.cntk:lmSequenceReaderInputLabelsDef=[ dim = 1 ; labelType = "category" ;  labelDim = "29" ; labelMappingFile = "../../Data/cmudict-0.7b.mapping" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]
configparameters: G2P.cntk:makeMode=false
configparameters: G2P.cntk:mappingFile=cmudict-0.7b.mapping
configparameters: G2P.cntk:maxLayer=2
configparameters: G2P.cntk:maxLength=20
configparameters: G2P.cntk:mbSizes=72:72:144:288
configparameters: G2P.cntk:modelPath=../../Out/g2p-01-0/G2P.dnn
configparameters: G2P.cntk:OutDir=../../Out
configparameters: G2P.cntk:precision=float
configparameters: G2P.cntk:readerType=CNTKTextFormatReader
configparameters: G2P.cntk:RunRootDir=../..
configparameters: G2P.cntk:shareEmbeddings=false
configparameters: G2P.cntk:startSymbol=<s>
configparameters: G2P.cntk:stderr=../../Out/g2p-01-0/G2P
configparameters: G2P.cntk:testFile=cmudict-0.7b.test.bsf.ctf
configparameters: G2P.cntk:traceLevel=1
configparameters: G2P.cntk:train=[
    action = "train"
    traceLevel = 1
epochSize = 0               
    SGD = [
        minibatchSize = 72:72:144:288
learningRatesPerSample = 0.007*2:0.0035 
        momentumAsTimeConstant = 1100
gradientClippingWithTruncation = true   
clippingThresholdPerSample = 2.3   
        maxEpochs = 50
        numMBsToShowResult = 100
        firstMBsToShowResult = 10
gradUpdateType = "none" 
loadBestModel = false   
        dropoutRate = 0.0
        AutoAdjust = [
            autoAdjustLR = "adjustAfterEpoch"
            reduceLearnRateIfImproveLessThan = 0.001
            continueReduce = false
            increaseLearnRateIfImproveMoreThan = 1000000000
            learnRateDecreaseFactor = 0.5
            learnRateIncreaseFactor = 1.382
            numMiniBatch4LRSearch = 100
            numPrevLearnRates = 5
            numBestSearchEpoch = 1
        ]
    ]
    reader = [
        readerType = "CNTKTextFormatReader"
        file = "../../Data/cmudict-0.7b.train-dev-20-21.bsf.ctf"
        randomize = "auto"
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
chunkSizeInBytes = 30000000         
        input = [ rawInput = [ alias = "s" ; dim = 29 ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = 41 ;  format = "sparse" ] ]
mode = "softmax"                    
nbruttsineachrecurrentiter = 0      
cacheBlockSize = 100000000          
        rawInput = [ dim = 0 ]
        inputLabelsDef = [ dim = 1 ; labelType = "category" ;  labelDim = "29" ; labelMappingFile = "../../Data/cmudict-0.7b.mapping" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]
        outputDummy = [ labelType = "none" ]
    ]
    cvReader = [
        readerType = "CNTKTextFormatReader"
        file = "../../Data/cmudict-0.7b.train-dev-1-21.bsf.ctf"
        randomize = "none"
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
        input = [ rawInput = [ alias = "s" ; dim = 29 ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = 41 ;  format = "sparse" ] ]
mode = "softmax"                    
nbruttsineachrecurrentiter = 0      
cacheBlockSize = 100000000          
        rawInput = [ dim = 0 ]
        inputLabelsDef = [ dim = 1 ; labelType = "category" ;  labelDim = "29" ; labelMappingFile = "../../Data/cmudict-0.7b.mapping" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]
        outputDummy = [ labelType = "none" ]
    ]
]

configparameters: G2P.cntk:trainFile=cmudict-0.7b.train-dev-20-21.bsf.ctf
configparameters: G2P.cntk:useCNTKTextFormatReader=true
configparameters: G2P.cntk:validFile=cmudict-0.7b.train-dev-1-21.bsf.ctf
configparameters: G2P.cntk:write=[
    action = "write"
    BrainScriptNetworkBuilder = (
        if      3 == 0 then BS.Network.Load ("../../Out/g2p-01-0/G2P.dnn.9")
        else if 3 == 1 then BS.Seq2Seq.GreedySequenceDecoderFrom (BS.Network.Load ("../../Out/g2p-01-0/G2P.dnn.9"))
        else                          BS.Seq2Seq.BeamSearchSequenceDecoderFrom (BS.Network.Load ("../../Out/g2p-01-0/G2P.dnn.9"), 3)
    )
    outputPath = ../../Out/g2p-01-0/G2P.dnn.9.3
    format = [
        type = "category"
        transpose = false
        labelMappingFile = "../../Data/cmudict-0.7b.mapping"
    ]
minibatchSize = 8192                
    traceLevel = 1
    epochSize = 0
    reader = [
        readerType = "CNTKTextFormatReader"
        file = "../../Data/cmudict-0.7b.test.bsf.ctf"
        randomize = "none"
        skipSequenceIds = "false"
        maxErrors = 100
        traceLevel = 2
        input = [ rawInput = [ alias = "s" ; dim = 29 ; format = "sparse" ] ; rawLabels = [ alias = "t" ;  dim = 41 ;  format = "sparse" ] ]
mode = "softmax"                    
nbruttsineachrecurrentiter = 1      
cacheBlockSize = 100000000          
        rawInput = [ dim = 0 ]
        inputLabelsDef = [ dim = 1 ; labelType = "category" ;  labelDim = "29" ; labelMappingFile = "../../Data/cmudict-0.7b.mapping" ; beginSequence = "</s>" ;  endSequence   = "</s>" ]
        outputDummy = [ labelType = "none" ]
    ]
]

<<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
Commands: train
Precision = "float"
CNTKModelPath: ../../Out/g2p-01-0/G2P.dnn
CNTKCommandTrainInfo: train : 50
CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 50

##############################################################################
#                                                                            #
# Action "train"                                                             #
#                                                                            #
##############################################################################

CNTKCommandTrainBegin: train

[CALL STACK]
[0x5a0acc]                                                            
[0x5d73a1]          bool Microsoft::MSR::CNTK::ConfigParameters::  operator()  <bool>(wchar_t const*,  bool const&) const + 0xc1
[0x7fbfada34293]    Microsoft::MSR::CNTK::TextConfigHelper::  TextConfigHelper  (Microsoft::MSR::CNTK::ConfigParameters const&) + 0x1c63
[0x7fbfada2fc27]    Microsoft::MSR::CNTK::CNTKTextFormatReader::  CNTKTextFormatReader  (std::shared_ptr<Microsoft::MSR::CNTK::MemoryProvider>,  Microsoft::MSR::CNTK::ConfigParameters const&) + 0x97
[0x7fbfada1a63c]                                                       + 0x1763c
[0x7fbfbdbdb34a]    Microsoft::MSR::CNTK::ReaderShim<float>::  Init  (Microsoft::MSR::CNTK::ConfigParameters const&) + 0xfa
[0x7fbfbdbb4707]    Microsoft::MSR::CNTK::DataReader::  DataReader  <Microsoft::MSR::CNTK::ConfigParameters>(Microsoft::MSR::CNTK::ConfigParameters const&) + 0x997
[0x7a66f6]          std::shared_ptr<Microsoft::MSR::CNTK::DataReader>  CreateObject  <Microsoft::MSR::CNTK::DataReader>(Microsoft::MSR::CNTK::ConfigParameters const&,  wchar_t const*) + 0x106
[0x7a8a46]          void  DoTrain  <Microsoft::MSR::CNTK::ConfigParameters,float>(Microsoft::MSR::CNTK::ConfigParameters const&) + 0xb6
[0x5f5bf0]          void  DoCommands  <float>(Microsoft::MSR::CNTK::ConfigParameters const&,  std::shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> const&) + 0x960
[0x5968e4]          wmainOldCNTKConfig  (int,  wchar_t**)              + 0xbf4
[0x596f77]          wmain1  (int,  wchar_t**)                          + 0x87
[0x58b4f8]          main                                               + 0xd8
[0x7fbfbc77ff45]    __libc_start_main                                  + 0xf5
[0x58f2e4]                                                            
EXCEPTION occurred: ConfigValue (bool): boolean argument expected

microsoft / CNTK

CMUDict boolean argument expected #768