private static void processFile(DocsStatistics docStats, string filename) { DocumentProcessor docProcessor = new DocumentProcessor(); string fileContent = File.ReadAllText(filename, Encoding.Default); using (Document doc = docProcessor.process(fileContent)) { docStats.addDocument(doc); } }
private DocsStatistics processFiles(string[] files) { DocumentProcessor docProcessor = new DocumentProcessor(); DocsStatistics docStats = new DocsStatistics(); foreach (string filename in files) { Document doc = docProcessor.process(filename); docStats.addDocument(doc); } return(docStats); }
public static IDF fromFiles(string[] files) { DocsStatistics docStats = new DocsStatistics(); DocumentProcessor docProcessor = new DocumentProcessor(); int i = 0; foreach (string file in files) { ++i; //processFile(docStats, file); //* string fileContent = File.ReadAllText(file, Encoding.Default); Document doc = docProcessor.process(fileContent); docStats.addDocument(doc); /* * if ((i % 1000) == 0) * { * System.GC.Collect(); * Trace.write("Done for : " + i); * } * //*/ //*/ //doc = null; } IDF idf = new IDF(); foreach (string word in docStats.wordsCount.Keys) { //double wordRefCount = docStats.wordRefs[firstWord] == null ? 0 : ((HashSet<Document>)docStats.wordRefs[firstWord]).Count; double wordRefCount = docStats.wordRefsCount[word] == null ? 0 : ((int)docStats.wordRefsCount[word]); double wordIdf = Math.Log(docStats.docCount / (wordRefCount)); idf.idf[word] = wordIdf; } return(idf); }