private static void CalculateCorrelationFromWikipediaDB(ProgramArguments programArgs) { WordBreaker wordBreaker = new WordBreaker(); StopWordRemover stopwordRemover = new StopWordRemover(); SStemmer stemmer = new SStemmer(); CorrelationMatrix correlationMatrix = new CorrelationMatrix(); string wikipediaPath = @"C:\Users\haabu\Downloads\enwiki-latest-pages-articles.xml\enwiki-latest-pages-articles.xml"; using (XmlReader sr = XmlReader.Create(new FileStream(wikipediaPath, FileMode.Open))) { for (int i = 0; i < programArgs.WikipediaStartArticle; i++) { bool elementFound = sr.ReadToFollowing("text"); if (!elementFound) { break; } } for (int i = programArgs.WikipediaStartArticle; i < programArgs.WikipediaEndArticle; i++) { bool elementFound = sr.ReadToFollowing("text"); if (elementFound) { string pageContents; //using (MonitoredScope scope = new MonitoredScope("Xml Read Element", TraceLevel.Medium)) { sr.ReadStartElement(); pageContents = sr.ReadContentAsString(); } string[] words; //using (MonitoredScope scope = new MonitoredScope("Break Paragraph", TraceLevel.Medium)) { words = wordBreaker.BreakParagraph(pageContents); } //using (MonitoredScope scope = new MonitoredScope("Remove Stop Words", TraceLevel.Medium)) { words = stopwordRemover.RemoveStopWords(words); } //using (MonitoredScope scope = new MonitoredScope("Stem Words", TraceLevel.Medium)) { words = stemmer.StemWords(words); } //using (MonitoredScope scope = new MonitoredScope("Calculate correlation", TraceLevel.Medium)) { correlationMatrix.Add(words); } Logger.Log("Finished document number: " + (i + 1).ToString()); } } } string filename = "autorss_" + Guid.NewGuid().ToString(); using (FileStream fs = new FileStream(filename, FileMode.CreateNew)) { BinaryFormatter formatter = new BinaryFormatter(); formatter.Serialize(fs, correlationMatrix); } Logger.Log("Saved to file: " + filename); filename = "autorss_Scopes_" + Guid.NewGuid().ToString(); using (FileStream fs = new FileStream(filename, FileMode.CreateNew)) { MonitoredScope.SerializeStatistics(fs); } Logger.Log("Saved to file: " + filename); }