Пример #1
0
        public void preprocessLanguageModel(string[] documentFiles, string bigramFilePath)
        {
            // No need for Stop Words Removal.
            StopWordsHandler.setInstance(new NullStopWordsHandler());

            DocumentProcessor     docProcessor = new DocumentProcessor();
            BigramStatisticsModel bigramStats  = new BigramStatisticsModel();

            int i = 0;

            foreach (string filename in documentFiles)
            {
                ++i;
                string   fileContent = File.ReadAllText(filename, Encoding.Default);
                Document doc         = docProcessor.process(fileContent);

                bigramStats.addDocument(doc);
            }

            bigramStats.toFile(bigramFilePath);
        }
        public void preprocessLanguageModel(string[] documentFiles, string bigramFilePath)
        {
            // No need for Stop Words Removal.
            StopWordsHandler.setInstance(new NullStopWordsHandler());

            DocumentProcessor docProcessor = new DocumentProcessor();
            BigramStatisticsModel bigramStats = new BigramStatisticsModel();

            int i = 0;

            foreach (string filename in documentFiles)
            {
                ++i;
                string fileContent = File.ReadAllText(filename, Encoding.Default);
                Document doc = docProcessor.process(fileContent);

                bigramStats.addDocument(doc);
            }

            bigramStats.toFile(bigramFilePath);
        }