Esempio n. 1
0
        public void TestIndexDocWithTermsCanonical()
        {
            GlobalIndex   globalIndex = new GlobalIndex();
            Document      document    = new FileDocument("myFile");
            ISet <string> terms       = new HashSet <string>();

            terms.Add("sewing machine");
            terms.Add("presser foot");

            globalIndex.IndexDocWithCanonicalTerms(document, terms);

            IDictionary <string, int> expectedTermIdMap = new Dictionary <string, int>();

            expectedTermIdMap.Add("sewing machine", 0);
            expectedTermIdMap.Add("presser foot", 1);
            Assert.IsTrue(Comparators.DictionariesAreEqual(expectedTermIdMap, globalIndex.GetTermIdMap()));

            Assert.AreEqual(0, globalIndex.RetrieveCanonicalTerm("sewing machine"));
            Assert.AreEqual(1, globalIndex.RetrieveCanonicalTerm("presser foot"));

            ISet <string> actualTermsCanonical = new HashSet <string>();

            actualTermsCanonical.UnionWith(globalIndex.GetCanonicalTerms());
            ISet <string> expectedTermsCanonical = new HashSet <string>();

            expectedTermsCanonical.Add("sewing machine");
            expectedTermsCanonical.Add("presser foot");
            Assert.IsTrue(Comparators.SetsAreEqual(expectedTermsCanonical, actualTermsCanonical));

            ISet <string> expectedVariants = new HashSet <string>();

            Assert.IsTrue(Comparators.SetsAreEqual(expectedVariants, globalIndex.RetrieveVariantsOfCanonicalTerm("string")));

            Assert.AreEqual("sewing machine", globalIndex.RetrieveCanonicalTerm(0));
            Assert.AreEqual("presser foot", globalIndex.RetrieveCanonicalTerm(1));

            IDictionary <Document, int> expectedDocMap = new Dictionary <Document, int>();

            expectedDocMap.Add(document, 0);
            Assert.IsTrue(Comparators.DictionariesAreEqual(expectedDocMap, globalIndex.GetDocMap()));

            ISet <Document> expectedDocuments = new HashSet <Document>();

            expectedDocuments.Add(document);
            Assert.IsTrue(Comparators.SetsAreEqual(expectedDocuments, globalIndex.GetDocuments()));

            IDictionary <int, ISet <int> > expectedDocsToTerms = new Dictionary <int, ISet <int> >();
            ISet <int> expectedTermIds = new HashSet <int>();

            expectedTermIds.Add(0);
            expectedTermIds.Add(1);
            expectedDocsToTerms.Add(0, expectedTermIds);
            Assert.IsTrue(Comparators.DictionariesOfSetsAreEqual(expectedDocsToTerms, globalIndex.GetDocToTerms()));

            Assert.IsTrue(Comparators.SetsAreEqual(expectedTermIds, globalIndex.RetrieveCanonicalTermIdsInDoc(0)));

            Assert.IsTrue(Comparators.SetsAreEqual(expectedTermsCanonical, globalIndex.RetrieveCanonicalTermsInDoc(0)));

            Assert.IsTrue(Comparators.SetsAreEqual(expectedTermsCanonical, globalIndex.RetrieveCanonicalTermsInDoc(document)));
        }
        public void TestBuild()
        {
            GlobalIndex   globalIndex = new GlobalIndex();
            Document      document    = new StringDocument("I love sewing; my sewing machine is a Pfaff and I love it.");
            ISet <string> terms       = new HashSet <string>();

            terms.Add("machine");
            terms.Add("sewing machine");
            terms.Add("sewing");

            globalIndex.IndexDocWithCanonicalTerms(document, terms);
            IDictionary <string, ISet <string> > termVariants = new Dictionary <string, ISet <string> >();
            ISet <string> machineVariants = new HashSet <string>();

            machineVariants.Add("machine");
            termVariants.Add("machine", machineVariants);
            ISet <string> sewingMachineVariants = new HashSet <string>();

            sewingMachineVariants.Add("sewing machine");
            termVariants.Add("sewing machine", sewingMachineVariants);
            ISet <string> sewingVariants = new HashSet <string>();

            sewingVariants.Add("sewing");
            termVariants.Add("sewing", sewingVariants);
            globalIndex.IndexTermWithVariant(termVariants);

            FeatureCorpusTermFrequencyBuilder featureCorpusTermFrequencyBuilder = new FeatureCorpusTermFrequencyBuilder();
            FeatureCorpusTermFrequency        featureCorpusTermFrequency        = featureCorpusTermFrequencyBuilder.Build(globalIndex);

            Assert.AreEqual(13, featureCorpusTermFrequency.GetTotalCorpusTermFrequency());
            Assert.AreEqual(1, featureCorpusTermFrequency.GetTermFrequency("machine"));

            Assert.AreEqual(2, featureCorpusTermFrequency.GetTermFrequency("sewing"));
        }
Esempio n. 3
0
        public void TestGetNestIdsOf()
        {
            GlobalIndex   globalIndex = new GlobalIndex();
            FileDocument  document    = new FileDocument("myFile");
            ISet <string> terms       = new HashSet <string>();

            terms.Add("machine");
            terms.Add("sewing machine");

            globalIndex.IndexDocWithCanonicalTerms(document, terms);

            FeatureTermNest featureTermNest = new FeatureTermNest(globalIndex);

            featureTermNest.TermNestIn("machine", "sewing machine");

            ISet <int> expectedNestIds = new HashSet <int>();

            expectedNestIds.Add(1);
            Assert.IsTrue(Comparators.SetsAreEqual(expectedNestIds, featureTermNest.GetNestIdsOf("machine")));
        }
Esempio n. 4
0
        public void TestGetTermFrequency()
        {
            GlobalIndex   globalIndex = new GlobalIndex();
            FileDocument  document    = new FileDocument("myFile");
            ISet <string> terms       = new HashSet <string>();

            terms.Add("machine");

            globalIndex.IndexDocWithCanonicalTerms(document, terms);

            FeatureCorpusTermFrequency featureCorpusTermFrequency = new FeatureCorpusTermFrequency(globalIndex);

            Assert.AreEqual(0, featureCorpusTermFrequency.GetTermFrequency("machine"));

            featureCorpusTermFrequency.AddToTermFrequency("machine", 2);
            Assert.AreEqual(2, featureCorpusTermFrequency.GetTermFrequency("machine"));

            featureCorpusTermFrequency.AddToTermFrequency("machine", 3);
            Assert.AreEqual(5, featureCorpusTermFrequency.GetTermFrequency("machine"));
        }