/// <summary> /// Fill the <i>rgShapes</i> array with the shapes of the recurrent input Blobs. /// </summary> /// <param name="rgShapes">Specifies the array of BlobShape to fill.</param> protected override void RecurrentInputShapes(List <BlobShape> rgShapes) { rgShapes.Clear(); BlobShape s = new param.BlobShape(); s.dim.Add(1); // a single timestep s.dim.Add(m_nN); s.dim.Add((int)m_param.recurrent_param.num_output); rgShapes.Add(s); }
/// <summary> /// Fill the <i>rgShapes</i> array with the shapes of the recurrent input Blobs. /// </summary> /// <param name="rgShapes">Specifies the array of BlobShape to fill.</param> protected override void RecurrentInputShapes(List <BlobShape> rgShapes) { int nNumBlobs = 2; rgShapes.Clear(); for (int i = 0; i < nNumBlobs; i++) { BlobShape s = new param.BlobShape(); s.dim.Add(1); // a single timestep s.dim.Add(m_nN); s.dim.Add((int)m_param.recurrent_param.num_output); rgShapes.Add(s); } }
private void layerSetUpCaffe(BlobCollection <T> colBottom, BlobCollection <T> colTop) { // Get (recurrent) input/output names. List <string> rgOutputNames = new List <string>(); OutputBlobNames(rgOutputNames); List <string> rgRecurInputNames = new List <string>(); RecurrentInputBlobNames(rgRecurInputNames); List <string> rgRecurOutputNames = new List <string>(); RecurrentOutputBlobNames(rgRecurOutputNames); int nNumRecurBlobs = rgRecurInputNames.Count; m_log.CHECK_EQ(nNumRecurBlobs, rgRecurOutputNames.Count, "The number of recurrent input names must equal the number of recurrent output names."); // If provided, bottom[2] is a static input to the recurrent net. int nNumHiddenExposed = (m_bExposeHidden) ? nNumRecurBlobs : 0; m_bStaticInput = (colBottom.Count > 2 + nNumHiddenExposed) ? true : false; if (m_bStaticInput) { m_log.CHECK_GE(colBottom[2].num_axes, 1, "When static input is present, the bottom[2].num_axes must be >= 1"); m_log.CHECK_EQ(m_nN, colBottom[2].shape(0), "When static input is present, the bottom[2].shape(0) must = N which is " + m_nN.ToString()); } // Create a NetParameter; setup the inputs that aren't unique to particular // recurrent architectures. NetParameter net_param = new NetParameter(); LayerParameter input_layer = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer.top.Add("x"); BlobShape input_shape1 = new param.BlobShape(); for (int i = 0; i < colBottom[0].num_axes; i++) { input_shape1.dim.Add(colBottom[0].shape(i)); } input_layer.input_param.shape.Add(input_shape1); input_layer.top.Add("cont"); BlobShape input_shape2 = new param.BlobShape(); for (int i = 0; i < colBottom[1].num_axes; i++) { input_shape2.dim.Add(colBottom[1].shape(i)); } input_layer.input_param.shape.Add(input_shape2); if (m_bStaticInput) { input_layer.top.Add("x_static"); BlobShape input_shape3 = new BlobShape(); for (int i = 0; i < colBottom[2].num_axes; i++) { input_shape3.dim.Add(colBottom[2].shape(i)); } input_layer.input_param.shape.Add(input_shape3); } net_param.layer.Add(input_layer); // Call the child's FillUnrolledNet implementation to specify the unrolled // recurrent architecture. FillUnrolledNet(net_param); // Prepend this layer's name to the names of each layer in the unrolled net. string strLayerName = m_param.name; if (strLayerName.Length > 0) { for (int i = 0; i < net_param.layer.Count; i++) { LayerParameter layer = net_param.layer[i]; layer.name = strLayerName + "_" + layer.name; } } // Add 'pseudo-losses' to all outputs to force backpropagation. // (Setting force_backward is too agressive as we may not need to backprop to // all inputs, e.g., the sequence continuation indicators.) List <string> rgPseudoLosses = new List <string>(); for (int i = 0; i < rgOutputNames.Count; i++) { rgPseudoLosses.Add(rgOutputNames[i] + "_pseudoloss"); LayerParameter layer = new LayerParameter(LayerParameter.LayerType.REDUCTION, rgPseudoLosses[i]); layer.bottom.Add(rgOutputNames[i]); layer.top.Add(rgPseudoLosses[i]); layer.loss_weight.Add(1.0); net_param.layer.Add(layer); } // Create the unrolled net. Net <T> sharedNet = null; if (m_param is LayerParameterEx <T> ) { RecurrentLayer <T> sharedLayer = ((LayerParameterEx <T>)m_param).SharedLayer as RecurrentLayer <T>; if (sharedLayer != null) { sharedNet = sharedLayer.m_unrolledNet; } } m_unrolledNet = new Net <T>(m_cuda, m_log, net_param, m_evtCancel, null, m_phase, null, sharedNet); m_unrolledNet.set_debug_info(m_param.recurrent_param.debug_info); // Setup pointers to the inputs. m_blobXInputBlob = m_unrolledNet.blob_by_name("x"); m_blobContInputBlob = m_unrolledNet.blob_by_name("cont"); if (m_bStaticInput) { m_blobXStaticInputBlob = m_unrolledNet.blob_by_name("x_static"); } // Setup pointers to paired recurrent inputs/outputs. m_colRecurInputBlobs = new common.BlobCollection <T>(); m_colRecurOutputBlobs = new common.BlobCollection <T>(); for (int i = 0; i < nNumRecurBlobs; i++) { m_colRecurInputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurInputNames[i])); m_colRecurOutputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurOutputNames[i])); } // Setup pointers to outputs. m_log.CHECK_EQ(colTop.Count() - nNumHiddenExposed, rgOutputNames.Count, "OutputBlobNames must provide output blob name for each top."); m_colOutputBlobs = new common.BlobCollection <T>(); for (int i = 0; i < rgOutputNames.Count; i++) { m_colOutputBlobs.Add(m_unrolledNet.blob_by_name(rgOutputNames[i])); } // We should have 2 inputs (x and cont), plus a number of recurrent inputs, // plus maybe a static input. int nStaticInput = (m_bStaticInput) ? 1 : 0; m_log.CHECK_EQ(2 + nNumRecurBlobs + nStaticInput, m_unrolledNet.input_blobs.Count, "The unrolled net input count should equal 2 + number of recurrent blobs (" + nNumRecurBlobs.ToString() + ") + static inputs (" + nStaticInput.ToString() + ")"); // This layer's parameters are any parameters in the layers of the unrolled // net. We only want one copy of each parameter, so check that the parameter // is 'owned' by the layer, rather than shared with another. blobs.Clear(); for (int i = 0; i < m_unrolledNet.parameters.Count; i++) { if (m_unrolledNet.param_owners[i] == -1) { m_log.WriteLine("Adding parameter " + i.ToString() + ": " + m_unrolledNet.param_display_names[i]); blobs.Add(m_unrolledNet.parameters[i]); } } // Check that param_propagate_down is set for all of the parameters in the // unrolled net; set param_propagate_down to true in this layer. for (int i = 0; i < m_unrolledNet.layers.Count; i++) { for (int j = 0; j < m_unrolledNet.layers[i].blobs.Count; j++) { m_log.CHECK(m_unrolledNet.layers[i].param_propagate_down(j), "param_propagate_down not set for layer " + i.ToString() + ", param " + j.ToString()); } } m_rgbParamPropagateDown = new DictionaryMap <bool>(blobs.Count, true); // Set the diffs of recurrent outputs to 0 -- we can't backpropagate across // batches. for (int i = 0; i < m_colRecurOutputBlobs.Count; i++) { m_colRecurOutputBlobs[i].SetDiff(0); } // Check that the last output_names.count layers are the pseudo-losses; // set last_layer_index so that we don't actually run these layers. List <string> rgLayerNames = m_unrolledNet.layer_names; m_nLastLayerIndex = rgLayerNames.Count - 1 - rgPseudoLosses.Count; for (int i = m_nLastLayerIndex + 1, j = 0; i < rgLayerNames.Count; i++, j++) { m_log.CHECK(rgLayerNames[i] == rgPseudoLosses[j], "The last layer at idx " + i.ToString() + " should be the pseudo layer named " + rgPseudoLosses[j]); } }