/// <summary> /// Calculates conditional impurity of y | x /// R(Y|X) is the average of H(Y|X = x) over all possible values /// X may take. /// </summary> /// <param name="y">Target impurity</param> /// <param name="x">Conditioned impurity</param> /// <param name="width">Split of values over x to condition upon</param> /// <returns>Conditional impurity measure</returns> public double Conditional(Vector y, Vector x) { if (x == null && y == null) throw new InvalidOperationException("x and y do not exist!"); double p = 0, // probability of slice h = 0, // impurity of y | x_i : ith slice result = 0, // aggregated sum count = x.Count(); // total items in list var values = x.Distinct().OrderBy(z => z); // distinct values to split on Segments = values.Select(z => Range.Make(z, z)).ToArray(); Discrete = true; // for each distinct value // calculate conditional impurity // and aggregate results foreach (var i in values) { // get slice var s = x.Indices(d => d == i); // slice probability p = (double)s.Count() / (double)count; // impurity of (y | x_i) h = Calculate(y.Slice(s)); // sum up result += p * h; } return result; }
/// <summary> /// Returns True if the Vector contains only positive and negative values (either 0 or -1). /// </summary> /// <param name="vector">The input vector.</param> /// <returns>Boolean.</returns> public static bool IsBinary(this Vector vector) { var v = vector.Distinct(); return(v.Count() == 2 && v.Contains(1d) && (v.Contains(0d) || v.Contains(-1d)) || v.Count() == 1 && (v.Contains(1d) || v.Contains(0d) || v.Contains(-1d))); }
/// <summary>Defaults.</summary> /// <param name="d">The Descriptor to process.</param> /// <param name="x">The Vector to process.</param> /// <param name="y">The Vector to process.</param> /// <param name="activation">The activation.</param> /// <returns>A Network.</returns> public static Network Default(Descriptor d, Matrix x, Vector y, IFunction activation) { var nn = new Network(); // set output to number of choices of available // 1 if only two choices var distinct = y.Distinct().Count(); var output = distinct > 2 ? distinct : 1; // identity funciton for bias nodes IFunction ident = new Ident(); // set number of hidden units to (Input + Hidden) * 2/3 as basic best guess. var hidden = (int)Math.Ceiling((decimal)(x.Cols + output) * 2m / 3m); // creating input nodes nn.In = new Node[x.Cols + 1]; nn.In[0] = new Node { Label = "B0", Activation = ident }; for (var i = 1; i < x.Cols + 1; i++) { nn.In[i] = new Node { Label = d.ColumnAt(i - 1), Activation = ident }; } // creating hidden nodes var h = new Node[hidden + 1]; h[0] = new Node { Label = "B1", Activation = ident }; for (var i = 1; i < hidden + 1; i++) { h[i] = new Node { Label = string.Format("H{0}", i), Activation = activation }; } // creating output nodes nn.Out = new Node[output]; for (var i = 0; i < output; i++) { nn.Out[i] = new Node { Label = GetLabel(i, d), Activation = activation }; } // link input to hidden. Note: there are // no inputs to the hidden bias node for (var i = 1; i < h.Length; i++) { for (var j = 0; j < nn.In.Length; j++) { Edge.Create(nn.In[j], h[i]); } } // link from hidden to output (full) for (var i = 0; i < nn.Out.Length; i++) { for (var j = 0; j < h.Length; j++) { Edge.Create(h[j], nn.Out[i]); } } return nn; }
/// <summary>A Vector extension method that statistics the given x coordinate.</summary> /// <param name="x">The x to act on.</param> /// <returns>A Matrix.</returns> public static Matrix Stats(this Vector x) { return((from i in x.Distinct().OrderBy(d => d) let q = (from j in x where j == i select j).Count() select new[] { i, q, q / (double)x.Length }) .ToMatrix()); }
/// <summary>Calculates Classification Error of x.</summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="x">The list in question.</param> /// <returns>Impurity measure.</returns> public override double Calculate(Vector x) { if (x == null) { throw new InvalidOperationException("x does not exist!"); } double length = x.Count(); var e = from i in x.Distinct() let q = (from j in x where j == i select j).Count() select q / length; return 1 - e.Max(); }
/// <summary>Calculates the Shannon Entropy of x.</summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="x">The list in question.</param> /// <returns>Impurity measure.</returns> public override double Calculate(Vector x) { if (x == null) { throw new InvalidOperationException("x does not exist!"); } double length = x.Count(); var px = from i in x.Distinct() let q = (from j in x where j == i select j).Count() select q / length; var e = (from p in px select -1 * p * Math.Log(p, 2)).Sum(); return e; }
/// <summary>Calculates Gini Index of x.</summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="x">The list in question.</param> /// <returns>Impurity measure.</returns> public override double Calculate(Vector x) { if (x == null) { throw new InvalidOperationException("x does not exist!"); } double length = x.Count(); var px = from i in x.Distinct() let q = (from j in x where j == i select j).Count() select q / length; var g = 1 - px.Select(d => d * d).Sum(); return g; }
/// <summary>Defaults.</summary> /// <param name="d">The Descriptor to process.</param> /// <param name="x">The Vector to process.</param> /// <param name="y">The Vector to process.</param> /// <param name="activationFunction">The activation.</param> /// <param name="outputFunction">The ouput function for hidden nodes (Optional).</param> /// <param name="epsilon">epsilon</param> /// <returns>A Network.</returns> public static Network Create(this Network network, Descriptor d, Matrix x, Vector y, IFunction activationFunction, IFunction outputFunction = null, double epsilon = double.NaN) { // set output to number of choices of available // 1 if only two choices int distinct = y.Distinct().Count(); int output = distinct > 2 ? distinct : 1; // identity funciton for bias nodes IFunction ident = new Ident(); // set number of hidden units to (Input + Hidden) * 2/3 as basic best guess. int hidden = (int)System.Math.Ceiling((double)(x.Cols + output) * 2.0 / 3.0); return network.Create(x.Cols, output, activationFunction, outputFunction, fnNodeInitializer: new Func<int, int, Neuron>((l, i) => { if (l == 0) return new Neuron(false) { Label = d.ColumnAt(i - 1), ActivationFunction = activationFunction, NodeId = i, LayerId = l }; else if (l == 2) return new Neuron(false) { Label = Network.GetLabel(i, d), ActivationFunction = activationFunction, NodeId = i, LayerId = l }; else return new Neuron(false) { ActivationFunction = activationFunction, NodeId = i, LayerId = l }; }), hiddenLayers: hidden); }
/// <summary> /// Creates a new deep neural network based on the supplied inputs and layers. /// </summary> /// <param name="d">Descriptor object.</param> /// <param name="X">Training examples</param> /// <param name="y">Training labels</param> /// <param name="activationFunction">Activation Function for each output layer.</param> /// <param name="outputFunction">Ouput Function for each output layer.</param> /// <param name="hiddenLayers">The intermediary (hidden) layers / ensembles in the network.</param> /// <returns>A Deep Neural Network</returns> public static Network Create(this Network network, Descriptor d, Matrix X, Vector y, IFunction activationFunction, IFunction outputFunction = null, params NetworkLayer[] hiddenLayers) { // set output to number of choices of available // 1 if only two choices int distinct = y.Distinct().Count(); int output = distinct > 2 ? distinct : 1; // identity function for bias nodes IFunction ident = new Ident(); // creating input nodes network.In = new Neuron[X.Cols + 1]; network.In[0] = new Neuron { Label = "B0", ActivationFunction = ident }; for (int i = 1; i < X.Cols + 1; i++) network.In[i] = new Neuron { Label = d.ColumnAt(i - 1), ActivationFunction = ident }; // creating output nodes network.Out = new Neuron[output]; for (int i = 0; i < output; i++) network.Out[i] = new Neuron { Label = Network.GetLabel(i, d), ActivationFunction = activationFunction, OutputFunction = outputFunction }; for (int layer = 0; layer < hiddenLayers.Count(); layer++) { if (layer == 0 && hiddenLayers[layer].IsAutoencoder) { // init and train it. } // connect input with previous layer or input layer // connect last layer with output layer } // link input to hidden. Note: there are // no inputs to the hidden bias node //for (int i = 1; i < h.Length; i++) // for (int j = 0; j < nn.In.Length; j++) // Edge.Create(nn.In[j], h[i]); //// link from hidden to output (full) //for (int i = 0; i < nn.Out.Length; i++) // for (int j = 0; j < h.Length; j++) // Edge.Create(h[j], nn.Out[i]); return network; }