public void TestIndexDocWithTermsCanonical() { GlobalIndex globalIndex = new GlobalIndex(); Document document = new FileDocument("myFile"); ISet <string> terms = new HashSet <string>(); terms.Add("sewing machine"); terms.Add("presser foot"); globalIndex.IndexDocWithCanonicalTerms(document, terms); IDictionary <string, int> expectedTermIdMap = new Dictionary <string, int>(); expectedTermIdMap.Add("sewing machine", 0); expectedTermIdMap.Add("presser foot", 1); Assert.IsTrue(Comparators.DictionariesAreEqual(expectedTermIdMap, globalIndex.GetTermIdMap())); Assert.AreEqual(0, globalIndex.RetrieveCanonicalTerm("sewing machine")); Assert.AreEqual(1, globalIndex.RetrieveCanonicalTerm("presser foot")); ISet <string> actualTermsCanonical = new HashSet <string>(); actualTermsCanonical.UnionWith(globalIndex.GetCanonicalTerms()); ISet <string> expectedTermsCanonical = new HashSet <string>(); expectedTermsCanonical.Add("sewing machine"); expectedTermsCanonical.Add("presser foot"); Assert.IsTrue(Comparators.SetsAreEqual(expectedTermsCanonical, actualTermsCanonical)); ISet <string> expectedVariants = new HashSet <string>(); Assert.IsTrue(Comparators.SetsAreEqual(expectedVariants, globalIndex.RetrieveVariantsOfCanonicalTerm("string"))); Assert.AreEqual("sewing machine", globalIndex.RetrieveCanonicalTerm(0)); Assert.AreEqual("presser foot", globalIndex.RetrieveCanonicalTerm(1)); IDictionary <Document, int> expectedDocMap = new Dictionary <Document, int>(); expectedDocMap.Add(document, 0); Assert.IsTrue(Comparators.DictionariesAreEqual(expectedDocMap, globalIndex.GetDocMap())); ISet <Document> expectedDocuments = new HashSet <Document>(); expectedDocuments.Add(document); Assert.IsTrue(Comparators.SetsAreEqual(expectedDocuments, globalIndex.GetDocuments())); IDictionary <int, ISet <int> > expectedDocsToTerms = new Dictionary <int, ISet <int> >(); ISet <int> expectedTermIds = new HashSet <int>(); expectedTermIds.Add(0); expectedTermIds.Add(1); expectedDocsToTerms.Add(0, expectedTermIds); Assert.IsTrue(Comparators.DictionariesOfSetsAreEqual(expectedDocsToTerms, globalIndex.GetDocToTerms())); Assert.IsTrue(Comparators.SetsAreEqual(expectedTermIds, globalIndex.RetrieveCanonicalTermIdsInDoc(0))); Assert.IsTrue(Comparators.SetsAreEqual(expectedTermsCanonical, globalIndex.RetrieveCanonicalTermsInDoc(0))); Assert.IsTrue(Comparators.SetsAreEqual(expectedTermsCanonical, globalIndex.RetrieveCanonicalTermsInDoc(document))); }
public void TestBuild() { GlobalIndex globalIndex = new GlobalIndex(); Document document = new StringDocument("I love sewing; my sewing machine is a Pfaff and I love it."); ISet <string> terms = new HashSet <string>(); terms.Add("machine"); terms.Add("sewing machine"); terms.Add("sewing"); globalIndex.IndexDocWithCanonicalTerms(document, terms); IDictionary <string, ISet <string> > termVariants = new Dictionary <string, ISet <string> >(); ISet <string> machineVariants = new HashSet <string>(); machineVariants.Add("machine"); termVariants.Add("machine", machineVariants); ISet <string> sewingMachineVariants = new HashSet <string>(); sewingMachineVariants.Add("sewing machine"); termVariants.Add("sewing machine", sewingMachineVariants); ISet <string> sewingVariants = new HashSet <string>(); sewingVariants.Add("sewing"); termVariants.Add("sewing", sewingVariants); globalIndex.IndexTermWithVariant(termVariants); FeatureCorpusTermFrequencyBuilder featureCorpusTermFrequencyBuilder = new FeatureCorpusTermFrequencyBuilder(); FeatureCorpusTermFrequency featureCorpusTermFrequency = featureCorpusTermFrequencyBuilder.Build(globalIndex); Assert.AreEqual(13, featureCorpusTermFrequency.GetTotalCorpusTermFrequency()); Assert.AreEqual(1, featureCorpusTermFrequency.GetTermFrequency("machine")); Assert.AreEqual(2, featureCorpusTermFrequency.GetTermFrequency("sewing")); }
public void TestGetNestIdsOf() { GlobalIndex globalIndex = new GlobalIndex(); FileDocument document = new FileDocument("myFile"); ISet <string> terms = new HashSet <string>(); terms.Add("machine"); terms.Add("sewing machine"); globalIndex.IndexDocWithCanonicalTerms(document, terms); FeatureTermNest featureTermNest = new FeatureTermNest(globalIndex); featureTermNest.TermNestIn("machine", "sewing machine"); ISet <int> expectedNestIds = new HashSet <int>(); expectedNestIds.Add(1); Assert.IsTrue(Comparators.SetsAreEqual(expectedNestIds, featureTermNest.GetNestIdsOf("machine"))); }
public void TestGetTermFrequency() { GlobalIndex globalIndex = new GlobalIndex(); FileDocument document = new FileDocument("myFile"); ISet <string> terms = new HashSet <string>(); terms.Add("machine"); globalIndex.IndexDocWithCanonicalTerms(document, terms); FeatureCorpusTermFrequency featureCorpusTermFrequency = new FeatureCorpusTermFrequency(globalIndex); Assert.AreEqual(0, featureCorpusTermFrequency.GetTermFrequency("machine")); featureCorpusTermFrequency.AddToTermFrequency("machine", 2); Assert.AreEqual(2, featureCorpusTermFrequency.GetTermFrequency("machine")); featureCorpusTermFrequency.AddToTermFrequency("machine", 3); Assert.AreEqual(5, featureCorpusTermFrequency.GetTermFrequency("machine")); }