Beispiel #1
0
        public Corpus GetCorpusFromText(IEnumerable<string> fileNames,
            IDocumentTextReader reader, Encoding outputEncoding)
        {
            Corpus cor = new Corpus(Encoding.UTF8);

            foreach (var file in fileNames)
            {
                try
                {
                    using (Stream stream = new FileStream(file, FileMode.Open,
                                FileAccess.Read))
                    {
                        WEMDocument[] docs =
                            reader.ReadDocuments(stream, outputEncoding);

                        foreach (var doc in docs)
                        {
                            cor.AddDocument(doc);
                        }
                    }
                }
                catch
                {
                    continue;
                }
            }

            return cor;
        }
Beispiel #2
0
 public WEMDocument[] GetDocumentsFromText(
     IEnumerable<string> fileNames, IDocumentTextReader reader,
     Encoding outputEncoding)
 {
     return GetCorpusFromText(fileNames, reader, outputEncoding)
         .Documents.ToArray();
 }