private static void InitializeReLUs(NetworkGraph graph) { // relus like a bit of positive bias to get gradients early // otherwise it's technically possible that a relu unit will never turn on (by chance) // and will never get any gradient and never contribute any computation. Dead relu. foreach (Layer layer in graph.Vertices.Where(x => x is ReLULayer).ToList()) { Layer target = layer; if (graph.InDegree(target) == 1) { Edge <Layer> edge = graph.InEdges(target)[0]; if (edge.Target is MaxPoolingLayer) { target = edge.Source; } } if (graph.InDegree(target) == 1) { Edge <Layer> edge = graph.InEdges(target)[0]; if (edge.Source is StochasticLayer stochasticLayer) { stochasticLayer.B.Set(0.1f); } } } }
private static void AddActivationLayers(NetworkGraph graph) { foreach (Layer layer in graph.Vertices.Where(x => ((x as TrainableLayer)?.NeedsActivation).GetValueOrDefault()).ToList()) { Layer source = layer; if (graph.OutDegree(source) == 1) { // optimization - add activation layer after max pooling layer that follows stochastic Edge <Layer> edge = graph.OutEdges(source)[0]; if (edge.Target is MaxPoolingLayer) { source = edge.Target; } } if (graph.OutDegree(source) == 1) { Edge <Layer> edge = graph.OutEdges(source)[0]; if (!(edge.Target is ActivationLayer) && !(edge.Target is LossLayer)) { Layer activationLayer = new TanhLayer(edge.Source.OutputShape); graph.AddVertex(activationLayer); Edge <Layer> newEdge = new Edge <Layer>(edge.Source, activationLayer); graph.OutEdges(source)[0] = newEdge; graph.InEdges(activationLayer).Add(newEdge); if (edge.Target != null) { IList <Edge <Layer> > inedges = graph.InEdges(edge.Target); int index = inedges.IndexOf(edge); newEdge = new Edge <Layer>(activationLayer, edge.Target); inedges[index] = newEdge; graph.OutEdges(activationLayer).Add(newEdge); } } } } }