private Measure[] CloneMeasure(Measure[] measures) { var m = new Measure[measures.Length]; for (int i = 0; i < m.Length; i++) m[i] = measures[i].Clone(); return m; }
/// <summary>Generates an object from its XML representation.</summary> /// <param name="reader">The <see cref="T:System.Xml.XmlReader" /> stream from which the object is /// deserialized.</param> public override void ReadXml(XmlReader reader) { reader.MoveToContent(); reader.ReadStartElement(); Descriptor = Xml.Read<Descriptor>(reader); Root = Xml.Read<Measure>(reader); }
/// <summary>Generate model based on a set of examples.</summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="X">The Matrix to process.</param> /// <param name="y">The Vector to process.</param> /// <returns>Model.</returns> public override IModel Generate(Matrix X, Vector y) { if (Descriptor == null) throw new InvalidOperationException("Cannot build naive bayes model without type knowledge!"); // create answer probabilities if (!Descriptor.Label.Discrete) throw new InvalidOperationException("Need to use regression for non-discrete labels!"); this.Preprocess(X); // compute Y probabilities Statistic[] statistics = GetLabelStats(y); Measure root = new Measure { Discrete = true, Label = Descriptor.Label.Name, Probabilities = statistics }; // collect feature ranges Measure[] features = GetBaseConditionals(X); // compute conditional counts for (int i = 0; i < y.Length; i++) { var stat = statistics.Where(s => s.X.Min == y[i]).First(); if (stat.Conditionals == null) stat.Conditionals = CloneMeasure(features); for (int j = 0; j < X.Cols; j++) { var s = stat.Conditionals[j]; s.Increment(X[i, j]); } } // normalize into probabilities for (int i = 0; i < statistics.Length; i++) { var cond = statistics[i]; for (int j = 0; j < cond.Conditionals.Length; j++) cond.Conditionals[j].Normalize(); } // label ids LabelIds(root); return new NaiveBayesModel { Descriptor = Descriptor, NormalizeFeatures = base.NormalizeFeatures, FeatureNormalizer = base.FeatureNormalizer, FeatureProperties = base.FeatureProperties, Root = root }; }
/// <summary>Generate model based on a set of examples.</summary> /// <exception cref="InvalidOperationException">Thrown when the requested operation is invalid.</exception> /// <param name="x">The Matrix to process.</param> /// <param name="y">The Vector to process.</param> /// <returns>Model.</returns> public override IModel Generate(Matrix x, Vector y) { if (this.Descriptor == null) { throw new InvalidOperationException("Cannot build naive bayes model without type knowledge!"); } // create answer probabilities if (!this.Descriptor.Label.Discrete) { throw new InvalidOperationException("Need to use regression for non-discrete labels!"); } // compute Y probabilities var statistics = this.GetLabelStats(y); var root = new Measure { Discrete = true, Label = this.Descriptor.Label.Name, Probabilities = statistics }; // collect feature ranges var features = this.GetBaseConditionals(x); // compute conditional counts for (var i = 0; i < y.Length; i++) { var stat = statistics.Where(s => s.X.Min == y[i]).First(); if (stat.Conditionals == null) { stat.Conditionals = this.CloneMeasure(features); } for (var j = 0; j < x.Cols; j++) { var s = stat.Conditionals[j]; s.Increment(x[i, j]); } } // normalize into probabilities for (var i = 0; i < statistics.Length; i++) { var cond = statistics[i]; for (var j = 0; j < cond.Conditionals.Length; j++) { cond.Conditionals[j].Normalize(); } } return new NaiveBayesModel { Descriptor = this.Descriptor, Root = root }; }
/// <summary>Makes a deep copy of this object.</summary> /// <returns>A copy of this object.</returns> public Measure Clone() { var m = new Measure { Label = this.Label, Discrete = this.Discrete }; if (this.Probabilities != null && this.Probabilities.Length > 0) { m.Probabilities = new Statistic[this.Probabilities.Length]; for (var i = 0; i < m.Probabilities.Length; i++) { m.Probabilities[i] = this.Probabilities[i].Clone(); } } return m; }
/// <summary>Makes a deep copy of this object.</summary> /// <returns>A copy of this object.</returns> public Measure Clone() { var m = new Measure { Label = Label, Discrete = Discrete }; if (Probabilities != null && Probabilities.Length > 0) { m.Probabilities = new Statistic[Probabilities.Length]; for (int i = 0; i < m.Probabilities.Length; i++) m.Probabilities[i] = Probabilities[i].Clone(); } return m; }
/// <summary>Gets base conditionals.</summary> /// <param name="x">The Matrix to process.</param> /// <returns>An array of measure.</returns> private Measure[] GetBaseConditionals(Matrix x) { Measure[] features = new Measure[x.Cols]; for (int i = 0; i < features.Length; i++) { Property p = Descriptor.At(i); var f = new Measure { Discrete = p.Discrete, Label = Descriptor.ColumnAt(i), }; IEnumerable<Statistic> fstats; if (f.Discrete) fstats = x[i, VectorType.Col].Distinct().OrderBy(d => d) .Select(d => Statistic.Make(p.Convert(d).ToString(), d, 1)); else fstats = x[i, VectorType.Col].Segment(Width) .Select(d => Statistic.Make(f.Label, d, 1)); f.Probabilities = fstats.ToArray(); features[i] = f; } return features; }
private void LabelIds(Measure m) { m.Id = ++_vertexId; if (m.Probabilities != null) { foreach (var s in m.Probabilities) s.Id = ++_vertexId; foreach (var s in m.Probabilities) if (s.Conditionals != null) foreach (var measure in s.Conditionals) LabelIds(measure); } }