public List <Term> Execute(AlgorithmContext context) { ISet <Term> result = new HashSet <Term>(); foreach (string term in context.GetTerms()) { double score; double log2a = Math.Log((double)term.Split(' ').Length + 0.1) / Math.Log(2.0); double freqa = (double)context.GetTermFrequency(term); ICollection <int> nest = context.GetNestsOf(term); double pTa = (double)nest.Count; double sumFrequencyb = 0.0; foreach (int id in nest) { sumFrequencyb += (double)context.GetTermFrequency(id); } score = pTa == 0 ? log2a * freqa : log2a * (freqa - (sumFrequencyb / pTa)); result.Add(new Term(term, score)); } List <Term> sortedTerms = new List <Term>(); foreach (Term term in result) { sortedTerms.Add(term); } sortedTerms.Sort(); return(sortedTerms); }
public List <Term> Run(List <string> filePaths) { var watchAll = System.Diagnostics.Stopwatch.StartNew(); var watch = System.Diagnostics.Stopwatch.StartNew(); StopList stop = new StopList(); Normalizer lemmatizer = new Normalizer(); NounPhraseExtractor nounPhraseExtractor = new NounPhraseExtractor(stop, lemmatizer); GlobalIndexBuilder builder = new GlobalIndexBuilder(); List <Document> documents = new List <Document>(); foreach (string filePath in filePaths) { documents.Add(new FileDocument(filePath)); } watch.Stop(); Console.WriteLine("Setup: " + watch.ElapsedMilliseconds + " ms"); watch = System.Diagnostics.Stopwatch.StartNew(); GlobalIndex termDocIndex = builder.Build(documents, nounPhraseExtractor); watch.Stop(); Console.WriteLine("GlobalIndexBuilder.Build(): " + watch.ElapsedMilliseconds + " ms"); watch = System.Diagnostics.Stopwatch.StartNew(); FeatureTermNest termNest = new FeatureTermNestBuilder().Build(termDocIndex); watch.Stop(); Console.WriteLine("FeatureTermNestBuilder.Build: " + watch.ElapsedMilliseconds + " ms"); watch = System.Diagnostics.Stopwatch.StartNew(); FeatureCorpusTermFrequency termCorpusFrequency = new FeatureCorpusTermFrequencyBuilder().Build(termDocIndex); watch.Stop(); Console.WriteLine("FeatureCorpusTermFrequencyBuilder.Build: " + watch.ElapsedMilliseconds + " ms"); watch = System.Diagnostics.Stopwatch.StartNew(); FileResultWriter writer = new FileResultWriter(termDocIndex); CValueAlgorithm algorithm = new CValueAlgorithm(); AlgorithmContext context = new AlgorithmContext(termCorpusFrequency, termNest); List <Term> terms = algorithm.Execute(context); watch.Stop(); watchAll.Stop(); Console.WriteLine("CValueAlgorithm.Execute: " + watch.ElapsedMilliseconds + " ms"); Console.WriteLine("Everything: " + watchAll.ElapsedMilliseconds + " ms"); return(terms); }