public void SparseDirichletProduct() { double commonPseudoCount = 1.0; double nonCommonPseudoCount1 = 10.0; double nonCommonPseudoCount2 = 30.0; Dirichlet d1 = new Dirichlet( SparseVector.FromSparseValues(100, commonPseudoCount, new List <SparseElement> { new SparseElement(20, nonCommonPseudoCount1), new SparseElement(55, nonCommonPseudoCount1) })); Dirichlet d2 = new Dirichlet( SparseVector.FromSparseValues(100, commonPseudoCount, new List <SparseElement> { new SparseElement(25, nonCommonPseudoCount2), new SparseElement(55, nonCommonPseudoCount2) })); Dirichlet d = d1 * d2; Assert.Equal(Sparsity.Sparse, d.Sparsity); SparseVector sv = (SparseVector)(d.PseudoCount); Assert.Equal(3, sv.SparseValues.Count); }
/// <summary> /// Parses a string describing a list of features. /// </summary> /// <param name="featureString">The string containing the list of features.</param> /// <param name="parsingContext">The file parsing context.</param> /// <param name="featureCount">The number of features in the dataset, which would be updated from the parsed feature indices.</param> /// <returns>A sparse array of features extracted from <paramref name="featureString"/>.</returns> private static Vector ParseFeatures(string featureString, FileParsingContext parsingContext, ref int featureCount) { Debug.Assert(featureString != null, "A valid feature string should be specified."); var featureIndexToValue = new SortedDictionary <int, double>(); string[] featureDescriptions = featureString.Split('|'); foreach (string featureDescription in featureDescriptions) { if (featureDescription.Trim().Length == 0) { continue; } string[] featureDescriptionParts = featureDescription.Split(':'); int featureIndex = 0; double featureValue = 0; if (featureDescriptionParts.Length != 2 || !int.TryParse(featureDescriptionParts[0], out featureIndex) || !double.TryParse(featureDescriptionParts[1], out featureValue)) { parsingContext.RaiseError("Invalid feature description string."); } if (featureIndexToValue.ContainsKey(featureIndex)) { parsingContext.RaiseError("Feature {0} is referenced several times.", featureIndex); } featureIndexToValue.Add(featureIndex, featureValue); featureCount = Math.Max(featureCount, featureIndex + 1); } return(SparseVector.FromSparseValues(featureCount, 0, featureIndexToValue.Select(kv => new ValueAtIndex <double>(kv.Key, kv.Value)).ToList())); }
public void SparseDiscreteRatio() { double commonValue = 1.0; double nonCommonValue1 = 10.0; double nonCommonValue2 = 30.0; Discrete d1 = new Discrete( SparseVector.FromSparseValues(100, commonValue, new List <SparseElement> { new SparseElement(20, nonCommonValue1), new SparseElement(55, nonCommonValue1) })); Discrete d2 = new Discrete( SparseVector.FromSparseValues(100, commonValue, new List <SparseElement> { new SparseElement(25, nonCommonValue2), new SparseElement(55, nonCommonValue2) })); Discrete d = d1 / d2; Assert.Equal(Sparsity.Sparse, d.Sparsity); SparseVector sv = (SparseVector)(d.GetProbs()); Assert.Equal(3, sv.SparseValues.Count); }
/// <summary> /// Converts a list of ints into a double vector. /// </summary> /// <param name="source"></param> /// <returns></returns> public static Vector ToVector(this IList <int> source) { if (source is SparseList <int> ) { var sl = (SparseList <int>)source; return(SparseVector.FromSparseValues( sl.Count, sl.CommonValue, sl.SparseValues.Select(sel => new ValueAtIndex <double> { Index = sel.Index, Value = sel.Value }).ToList() )); } return(Vector.FromArray(source.Select(x => (double)x).ToArray())); }
/// <summary> /// Converts a list of doubles into a vector. /// </summary> /// <param name="source"></param> /// <returns></returns> public static Vector ToVector(this IList <double> source) { if (source is Vector) { return((Vector)source); } if (source is SparseList <double> ) { var sl = (SparseList <double>)source; return(SparseVector.FromSparseValues( sl.Count, sl.CommonValue, sl.SparseValues )); } return(Vector.FromArray(source.ToArray())); }
public void SparseDiscreteNormalise() { double commonValue = 1.0; double nonCommonValue = 10.0; Discrete d = new Discrete( SparseVector.FromSparseValues(100, 1.0, new List <SparseElement> { new SparseElement(20, nonCommonValue), new SparseElement(55, nonCommonValue) })); Vector v = d.GetProbs(); Assert.Equal(Sparsity.Sparse, d.Sparsity); double sum = (v.Count - 2) * commonValue + 2 * nonCommonValue; SparseVector sv = (SparseVector)v; Assert.Equal(sv.CommonValue, commonValue / sum); Assert.Equal(2, sv.SparseValues.Count); }
/// <summary> /// Gets the feature values of the specified feature selection as a sparse vector. /// </summary> /// <param name="featureSelection">An optional selection of features. Defaults to all features being selected.</param> /// <returns>The feature values as a sparse vector.</returns> public Vector GetSparseFeatureVector(IndexedSet <string> featureSelection = null) { List <ValueAtIndex <double> > sortedFeatures; if (featureSelection == null) { // Return all feature values as a dense vector sortedFeatures = this.featureValues.OrderBy(i => i.Key).Select(indexValuePair => new ValueAtIndex <double>(indexValuePair.Key, indexValuePair.Value)).ToList(); return(SparseVector.FromSparseValues(this.FeatureSet.Count, 0.0, sortedFeatures)); } // Construct sparse feature vector from selected features int featureCount = featureSelection.Count; var selectedFeatures = new Dictionary <int, double>(); foreach (string featureName in featureSelection.Elements) { int featureIndex; if (this.FeatureSet.TryGetIndex(featureName, out featureIndex)) { if (this.featureValues.ContainsKey(featureIndex)) { // Get original index int selectedFeatureIndex; if (!featureSelection.TryGetIndex(featureName, out selectedFeatureIndex)) { throw new ArgumentException("Invalid feature selection."); } selectedFeatures.Add(selectedFeatureIndex, this.featureValues[featureIndex]); } } } sortedFeatures = selectedFeatures.OrderBy(i => i.Key).Select(indexValuePair => new ValueAtIndex <double>(indexValuePair.Key, indexValuePair.Value)).ToList(); return(SparseVector.FromSparseValues(featureCount, 0.0, sortedFeatures)); }