예제 #1
0
        public List <Term> Execute(AlgorithmContext context)
        {
            ISet <Term> result = new HashSet <Term>();

            foreach (string term in context.GetTerms())
            {
                double            score;
                double            log2a         = Math.Log((double)term.Split(' ').Length + 0.1) / Math.Log(2.0);
                double            freqa         = (double)context.GetTermFrequency(term);
                ICollection <int> nest          = context.GetNestsOf(term);
                double            pTa           = (double)nest.Count;
                double            sumFrequencyb = 0.0;
                foreach (int id in nest)
                {
                    sumFrequencyb += (double)context.GetTermFrequency(id);
                }
                score = pTa == 0 ? log2a * freqa : log2a * (freqa - (sumFrequencyb / pTa));
                result.Add(new Term(term, score));
            }

            List <Term> sortedTerms = new List <Term>();

            foreach (Term term in result)
            {
                sortedTerms.Add(term);
            }
            sortedTerms.Sort();
            return(sortedTerms);
        }
예제 #2
0
        public List <Term> Run(List <string> filePaths)
        {
            var watchAll = System.Diagnostics.Stopwatch.StartNew();
            var watch    = System.Diagnostics.Stopwatch.StartNew();

            StopList            stop                = new StopList();
            Normalizer          lemmatizer          = new Normalizer();
            NounPhraseExtractor nounPhraseExtractor = new NounPhraseExtractor(stop, lemmatizer);
            GlobalIndexBuilder  builder             = new GlobalIndexBuilder();
            List <Document>     documents           = new List <Document>();

            foreach (string filePath in filePaths)
            {
                documents.Add(new FileDocument(filePath));
            }

            watch.Stop();
            Console.WriteLine("Setup: " + watch.ElapsedMilliseconds + " ms");
            watch = System.Diagnostics.Stopwatch.StartNew();

            GlobalIndex termDocIndex = builder.Build(documents, nounPhraseExtractor);

            watch.Stop();
            Console.WriteLine("GlobalIndexBuilder.Build(): " + watch.ElapsedMilliseconds + " ms");
            watch = System.Diagnostics.Stopwatch.StartNew();

            FeatureTermNest termNest = new FeatureTermNestBuilder().Build(termDocIndex);

            watch.Stop();
            Console.WriteLine("FeatureTermNestBuilder.Build: " + watch.ElapsedMilliseconds + " ms");
            watch = System.Diagnostics.Stopwatch.StartNew();

            FeatureCorpusTermFrequency termCorpusFrequency = new FeatureCorpusTermFrequencyBuilder().Build(termDocIndex);

            watch.Stop();
            Console.WriteLine("FeatureCorpusTermFrequencyBuilder.Build: " + watch.ElapsedMilliseconds + " ms");
            watch = System.Diagnostics.Stopwatch.StartNew();

            FileResultWriter writer    = new FileResultWriter(termDocIndex);
            CValueAlgorithm  algorithm = new CValueAlgorithm();
            AlgorithmContext context   = new AlgorithmContext(termCorpusFrequency, termNest);
            List <Term>      terms     = algorithm.Execute(context);

            watch.Stop();
            watchAll.Stop();
            Console.WriteLine("CValueAlgorithm.Execute: " + watch.ElapsedMilliseconds + " ms");
            Console.WriteLine("Everything: " + watchAll.ElapsedMilliseconds + " ms");

            return(terms);
        }