public virtual ICounter <L> ProbabilityOf(IDatum <L, F> example) { // calculate the feature indices and feature values int[] featureIndices = LogisticUtils.IndicesOf(example.AsFeatures(), featureIndex); double[] featureValues; if (example is RVFDatum <object, object> ) { ICollection <double> featureValuesCollection = ((RVFDatum <object, object>)example).AsFeaturesCounter().Values(); featureValues = LogisticUtils.ConvertToArray(featureValuesCollection); } else { featureValues = new double[example.AsFeatures().Count]; Arrays.Fill(featureValues, 1.0); } // calculate probability of each class ICounter <L> result = new ClassicCounter <L>(); int numClasses = labelIndex.Size(); double[] sigmoids = LogisticUtils.CalculateSigmoids(weights, featureIndices, featureValues); for (int c = 0; c < numClasses; c++) { L label = labelIndex.Get(c); result.IncrementCount(label, sigmoids[c]); } return(result); }
/// <summary>Method to convert features from counts to L1-normalized TFIDF based features</summary> /// <param name="datum">with a collection of features.</param> /// <param name="featureDocCounts">a counter of doc-count for each feature.</param> /// <returns>RVFDatum with l1-normalized tf-idf features.</returns> public virtual RVFDatum <L, F> GetL1NormalizedTFIDFDatum(IDatum <L, F> datum, ICounter <F> featureDocCounts) { ICounter <F> tfidfFeatures = new ClassicCounter <F>(); foreach (F feature in datum.AsFeatures()) { if (featureDocCounts.ContainsKey(feature)) { tfidfFeatures.IncrementCount(feature, 1.0); } } double l1norm = 0; foreach (F feature_1 in tfidfFeatures.KeySet()) { double idf = Math.Log(((double)(this.Size() + 1)) / (featureDocCounts.GetCount(feature_1) + 0.5)); double tf = tfidfFeatures.GetCount(feature_1); tfidfFeatures.SetCount(feature_1, tf * idf); l1norm += tf * idf; } foreach (F feature_2 in tfidfFeatures.KeySet()) { double tfidf = tfidfFeatures.GetCount(feature_2); tfidfFeatures.SetCount(feature_2, tfidf / l1norm); } RVFDatum <L, F> rvfDatum = new RVFDatum <L, F>(tfidfFeatures, datum.Label()); return(rvfDatum); }
public virtual double ProbabilityOf(IDatum <L, F> example) { if (example is RVFDatum <object, object> ) { return(ProbabilityOfRVFDatum((RVFDatum <L, F>)example)); } return(ProbabilityOf(example.AsFeatures(), example.Label())); }
public virtual L ClassOf(IDatum <L, F> datum) { if (datum is RVFDatum <object, object> ) { return(ClassOfRVFDatum((RVFDatum <L, F>)datum)); } return(ClassOf(datum.AsFeatures())); }
/// <summary>Constructs a new RVFDatum taking the data from a Datum.</summary> /// <remarks> /// Constructs a new RVFDatum taking the data from a Datum. <i>Implementation /// note:</i> This constructor allocates its own counter over features, but is /// only guaranteed correct if the label and feature names are immutable. /// </remarks> /// <param name="m">The Datum to copy.</param> public RVFDatum(IDatum <L, F> m) { this.features = new ClassicCounter <F>(); foreach (F key in m.AsFeatures()) { features.IncrementCount(key, 1.0); } SetLabel(m.Label()); }
/// <summary>Returns whether the given Datum contains the same features as this Datum.</summary> /// <remarks> /// Returns whether the given Datum contains the same features as this Datum. /// Doesn't check the labels, should we change this? /// (CDM Feb 2012: Also doesn't correctly respect the contract for equals, /// since it gives one way equality with other Datum's.) /// </remarks> /// <param name="o">The object to test equality with</param> /// <returns>Whether it is equal to this CRFDatum in terms of features</returns> public override bool Equals(object o) { if (!(o is IDatum)) { return(false); } IDatum <object, object> d = (IDatum <object, object>)o; return(features.Equals(d.AsFeatures())); }
/// <summary>Returns whether the given Datum contains the same features as this Datum.</summary> /// <remarks> /// Returns whether the given Datum contains the same features as this Datum. /// Doesn't check the labels, should we change this? /// </remarks> public override bool Equals(object o) { if (!(o is IDatum)) { return(false); } IDatum <LabelType, FeatureType> d = (IDatum <LabelType, FeatureType>)o; return(features.Equals(d.AsFeatures())); }
private void ComputeEmpiricalStatistics(IList <F> geFeatures) { //allocate memory to the containers and initialize them geFeature2EmpiricalDist = new double[][] { }; geFeature2DatumList = new List <IList <int> >(geFeatures.Count); IDictionary <F, int> geFeatureMap = Generics.NewHashMap(); ICollection <int> activeUnlabeledExamples = Generics.NewHashSet(); for (int n = 0; n < geFeatures.Count; n++) { F geFeature = geFeatures[n]; geFeature2DatumList.Add(new List <int>()); Arrays.Fill(geFeature2EmpiricalDist[n], 0); geFeatureMap[geFeature] = n; } //compute the empirical label distribution for each GE feature for (int i = 0; i < labeledDataset.Size(); i++) { IDatum <L, F> datum = labeledDataset.GetDatum(i); int labelID = labeledDataset.labelIndex.IndexOf(datum.Label()); foreach (F feature in datum.AsFeatures()) { if (geFeatureMap.Contains(feature)) { int geFnum = geFeatureMap[feature]; geFeature2EmpiricalDist[geFnum][labelID]++; } } } //now normalize and smooth the label distribution for each feature. for (int n_1 = 0; n_1 < geFeatures.Count; n_1++) { ArrayMath.Normalize(geFeature2EmpiricalDist[n_1]); SmoothDistribution(geFeature2EmpiricalDist[n_1]); } //now build the inverted index from each GE feature to unlabeled datums that contain it. for (int i_1 = 0; i_1 < unlabeledDataList.Count; i_1++) { IDatum <L, F> datum = unlabeledDataList[i_1]; foreach (F feature in datum.AsFeatures()) { if (geFeatureMap.Contains(feature)) { int geFnum = geFeatureMap[feature]; geFeature2DatumList[geFnum].Add(i_1); activeUnlabeledExamples.Add(i_1); } } } System.Console.Out.WriteLine("Number of active unlabeled examples:" + activeUnlabeledExamples.Count); }
/// <summary>returns the scores for both the classes</summary> public virtual ICounter <L> ScoresOf(IDatum <L, F> datum) { if (datum is RVFDatum <object, object> ) { return(ScoresOfRVFDatum((RVFDatum <L, F>)datum)); } ICollection <F> features = datum.AsFeatures(); double sum = ScoreOf(features); ICounter <L> c = new ClassicCounter <L>(); c.SetCount(classes[0], -sum); c.SetCount(classes[1], sum); return(c); }
// TODO: Check that this does what we want for Datum other than RVFDatum public override void Add(IDatum <L, F> d) { // If you edit me, also take care of WeightedRVFDataset if (d is RVFDatum <object, object> ) { AddLabel(d.Label()); AddFeatures(((RVFDatum <L, F>)d).AsFeaturesCounter()); size++; } else { AddLabel(d.Label()); AddFeatures(Counters.AsCounter(d.AsFeatures())); size++; } }
// If you edit me, also take care of WeightedRVFDataset public virtual void Add(IDatum <L, F> d, string src, string id) { if (d is RVFDatum <object, object> ) { AddLabel(d.Label()); AddFeatures(((RVFDatum <L, F>)d).AsFeaturesCounter()); AddSourceAndId(src, id); size++; } else { AddLabel(d.Label()); AddFeatures(Counters.AsCounter(d.AsFeatures())); AddSourceAndId(src, id); size++; } }
public static IDatum <L2, F> MapDatum <L, L2, F>(IDatum <L, F> d, IDictionary <L, L2> labelMapping, L2 defaultLabel) { // TODO: How to copy datum? L2 newLabel = labelMapping[d.Label()]; if (newLabel == null) { newLabel = defaultLabel; } if (d is RVFDatum) { return(new RVFDatum <L2, F>(((RVFDatum <L, F>)d).AsFeaturesCounter(), newLabel)); } else { return(new BasicDatum <L2, F>(d.AsFeatures(), newLabel)); } }
// loop over each feature for derivative computation //end of if condition //loop over each GE feature private void UpdateDerivative(IDatum <L, F> datum, double[] probs, ICounter <Triple <int, int, int> > feature2classPairDerivatives) { foreach (F feature in datum.AsFeatures()) { int fID = labeledDataset.featureIndex.IndexOf(feature); if (fID >= 0) { for (int c = 0; c < numClasses; c++) { for (int cPrime = 0; cPrime < numClasses; cPrime++) { if (cPrime == c) { feature2classPairDerivatives.IncrementCount(new Triple <int, int, int>(fID, c, cPrime), -probs[c] * (1 - probs[c]) * ValueOfFeature(feature, datum)); } else { feature2classPairDerivatives.IncrementCount(new Triple <int, int, int>(fID, c, cPrime), probs[c] * probs[cPrime] * ValueOfFeature(feature, datum)); } } } } } }
public virtual void Add(IDatum <L, F> d, float weight) { Add(d.AsFeatures(), d.Label(), weight); }
public override void Add(IDatum <L, F> d) { Add(d.AsFeatures(), d.Label()); }