private DocsStatistics processFiles(string[] files)
        {
            DocumentProcessor docProcessor = new DocumentProcessor();
            DocsStatistics docStats = new DocsStatistics();

            foreach (string filename in files)
            {
                Document doc = docProcessor.process(filename);

                docStats.addDocument(doc);
            }

            return (docStats);
        }
Пример #2
0
            private static void processFile(DocsStatistics docStats, string filename)
            {
                DocumentProcessor docProcessor = new DocumentProcessor();

                string fileContent = File.ReadAllText(filename, Encoding.Default);
                using (Document doc = docProcessor.process(fileContent))
                {
                    docStats.addDocument(doc);
                }
            }
Пример #3
0
            public static IDF fromFiles(string[] files)
            {
                DocsStatistics docStats = new DocsStatistics();
                DocumentProcessor docProcessor = new DocumentProcessor();

                int i = 0;

                foreach (string file in files)
                {
                    ++i;
                    //processFile(docStats, file);
                    //*
                    string fileContent = File.ReadAllText(file, Encoding.Default);
                    Document doc = docProcessor.process(fileContent);
                    docStats.addDocument(doc);
                    /*
                    if ((i % 1000) == 0)
                    {
                        System.GC.Collect();
                        Trace.write("Done for : " + i);
                    }
                    //*/
                    //*/

                    //doc = null;
                }

                IDF idf = new IDF();

                foreach (string word in docStats.wordsCount.Keys)
                {
                    //double wordRefCount = docStats.wordRefs[firstWord] == null ? 0 : ((HashSet<Document>)docStats.wordRefs[firstWord]).Count;
                    double wordRefCount = docStats.wordRefsCount[word] == null ? 0 : ((int)docStats.wordRefsCount[word]);
                    double wordIdf = Math.Log(docStats.docCount / (wordRefCount));

                    idf.idf[word] = wordIdf;
                }

                return (idf);
            }