public void TestBuild() { GlobalIndex globalIndex = new GlobalIndex(); Document document = new StringDocument("I love sewing; my sewing machine is a Pfaff and I love it."); ISet <string> terms = new HashSet <string>(); terms.Add("machine"); terms.Add("sewing machine"); terms.Add("sewing"); globalIndex.IndexDocWithCanonicalTerms(document, terms); IDictionary <string, ISet <string> > termVariants = new Dictionary <string, ISet <string> >(); ISet <string> machineVariants = new HashSet <string>(); machineVariants.Add("machine"); termVariants.Add("machine", machineVariants); ISet <string> sewingMachineVariants = new HashSet <string>(); sewingMachineVariants.Add("sewing machine"); termVariants.Add("sewing machine", sewingMachineVariants); ISet <string> sewingVariants = new HashSet <string>(); sewingVariants.Add("sewing"); termVariants.Add("sewing", sewingVariants); globalIndex.IndexTermWithVariant(termVariants); FeatureCorpusTermFrequencyBuilder featureCorpusTermFrequencyBuilder = new FeatureCorpusTermFrequencyBuilder(); FeatureCorpusTermFrequency featureCorpusTermFrequency = featureCorpusTermFrequencyBuilder.Build(globalIndex); Assert.AreEqual(13, featureCorpusTermFrequency.GetTotalCorpusTermFrequency()); Assert.AreEqual(1, featureCorpusTermFrequency.GetTermFrequency("machine")); Assert.AreEqual(2, featureCorpusTermFrequency.GetTermFrequency("sewing")); }
public void TestGetTermFrequency() { GlobalIndex globalIndex = new GlobalIndex(); FileDocument document = new FileDocument("myFile"); ISet <string> terms = new HashSet <string>(); terms.Add("machine"); globalIndex.IndexDocWithCanonicalTerms(document, terms); FeatureCorpusTermFrequency featureCorpusTermFrequency = new FeatureCorpusTermFrequency(globalIndex); Assert.AreEqual(0, featureCorpusTermFrequency.GetTermFrequency("machine")); featureCorpusTermFrequency.AddToTermFrequency("machine", 2); Assert.AreEqual(2, featureCorpusTermFrequency.GetTermFrequency("machine")); featureCorpusTermFrequency.AddToTermFrequency("machine", 3); Assert.AreEqual(5, featureCorpusTermFrequency.GetTermFrequency("machine")); }