public void TestBuild()
        {
            GlobalIndex   globalIndex = new GlobalIndex();
            Document      document    = new StringDocument("I love sewing; my sewing machine is a Pfaff and I love it.");
            ISet <string> terms       = new HashSet <string>();

            terms.Add("machine");
            terms.Add("sewing machine");
            terms.Add("sewing");

            globalIndex.IndexDocWithCanonicalTerms(document, terms);
            IDictionary <string, ISet <string> > termVariants = new Dictionary <string, ISet <string> >();
            ISet <string> machineVariants = new HashSet <string>();

            machineVariants.Add("machine");
            termVariants.Add("machine", machineVariants);
            ISet <string> sewingMachineVariants = new HashSet <string>();

            sewingMachineVariants.Add("sewing machine");
            termVariants.Add("sewing machine", sewingMachineVariants);
            ISet <string> sewingVariants = new HashSet <string>();

            sewingVariants.Add("sewing");
            termVariants.Add("sewing", sewingVariants);
            globalIndex.IndexTermWithVariant(termVariants);

            FeatureCorpusTermFrequencyBuilder featureCorpusTermFrequencyBuilder = new FeatureCorpusTermFrequencyBuilder();
            FeatureCorpusTermFrequency        featureCorpusTermFrequency        = featureCorpusTermFrequencyBuilder.Build(globalIndex);

            Assert.AreEqual(13, featureCorpusTermFrequency.GetTotalCorpusTermFrequency());
            Assert.AreEqual(1, featureCorpusTermFrequency.GetTermFrequency("machine"));

            Assert.AreEqual(2, featureCorpusTermFrequency.GetTermFrequency("sewing"));
        }
Esempio n. 2
0
        public void TestIndexTermWithVariant()
        {
            GlobalIndex globalIndex = new GlobalIndex();
            IDictionary <string, ISet <string> > map = new Dictionary <string, ISet <string> >();
            ISet <string> variants = new HashSet <string>();

            variants.Add("file name");
            variants.Add("filename");
            map.Add("filename", variants);
            globalIndex.IndexTermWithVariant(map);

            IDictionary <string, int> expectedTermIdMap = new Dictionary <string, int>();

            expectedTermIdMap.Add("filename", 0);
            Assert.IsTrue(Comparators.DictionariesAreEqual(expectedTermIdMap, globalIndex.GetTermIdMap()));

            Assert.AreEqual(0, globalIndex.RetrieveCanonicalTerm("filename"));

            ISet <string> actualTermsCanonical = new HashSet <string>();

            actualTermsCanonical.UnionWith(globalIndex.GetCanonicalTerms());
            ISet <string> expectedTermsCanonical = new HashSet <string>();

            expectedTermsCanonical.Add("filename");
            Assert.IsTrue(Comparators.SetsAreEqual(expectedTermsCanonical, actualTermsCanonical));

            Assert.IsTrue(Comparators.SetsAreEqual(variants, globalIndex.RetrieveVariantsOfCanonicalTerm("filename")));

            Assert.AreEqual("filename", globalIndex.RetrieveCanonicalTerm(0));

            IDictionary <int, ISet <int> > expectedTermToVariant = new Dictionary <int, ISet <int> >();
            ISet <int> expectedVariantIds = new HashSet <int>();

            expectedVariantIds.Add(0);
            expectedVariantIds.Add(1);
            expectedTermToVariant.Add(0, expectedVariantIds);
            Assert.IsTrue(Comparators.DictionariesOfSetsAreEqual(expectedTermToVariant, globalIndex.GetTermToVariant()));

            IDictionary <int, int> expectedVariantToTerm = new Dictionary <int, int>();

            expectedVariantToTerm.Add(0, 0);
            expectedVariantToTerm.Add(1, 0);
            Assert.IsTrue(Comparators.DictionariesAreEqual(expectedVariantToTerm, globalIndex.GetVariantToTerm()));

            IDictionary <string, int> expectedVariantMap = new Dictionary <string, int>();

            expectedVariantMap.Add("file name", 0);
            expectedVariantMap.Add("filename", 1);
            Assert.IsTrue(Comparators.DictionariesAreEqual(expectedVariantMap, globalIndex.GetVariantMap()));
        }