Exemple #1
0
        private static async Task <List <List <SentenceDetector.SentenceDetectorToken> > > ReadCorpusAsync(List <string> trainDocuments, EnumCase ConvertCase, SentenceDetector sentenceDetector)
        {
            var allLines = new List <string>();

            foreach (var file in trainDocuments)
            {
                allLines.AddRange(await File.ReadAllLinesAsync(file));
            }

            var sentences = allLines.Where(l => l.StartsWith("# text =")).Select(l => l.Split(new char[] { '=' }, 2).Last().Trim()).ToList();

            if (ConvertCase == EnumCase.ForceUpper)
            {
                sentences = sentences.Select(s => s.ToUpperInvariant()).ToList();
            }
            if (ConvertCase == EnumCase.ForceLower)
            {
                sentences = sentences.Select(s => s.ToLowerInvariant()).ToList();
            }

            return(sentences.Select(s =>
            {
                var tk = sentenceDetector.SentenceDetectorTokenizer(s).Select(t => new SentenceDetector.SentenceDetectorToken(t.Value, t.Begin, t.End)).ToList();
                tk.Last().IsSentenceEnd = true;
                return tk;
            }).ToList());
        }
Exemple #2
0
        private static List <List <SentenceDetector.SentenceDetectorToken> > ReadCorpus(List <string> trainDocuments, EnumCase ConvertCase, SentenceDetector sentenceDetector)
        {
            var allLines  = trainDocuments.SelectMany(f => File.ReadAllLines(f));
            var sentences = allLines.Where(l => l.StartsWith("# text =")).Select(l => l.Split(new char[] { '=' }, 2).Last().Trim()).ToList();

            if (ConvertCase == EnumCase.ForceUpper)
            {
                sentences = sentences.Select(s => s.ToUpperInvariant()).ToList();
            }
            if (ConvertCase == EnumCase.ForceLower)
            {
                sentences = sentences.Select(s => s.ToLowerInvariant()).ToList();
            }

            return(sentences.Select(s =>
            {
                var tk = sentenceDetector.SentenceDetectorTokenizer(s).Select(t => new SentenceDetector.SentenceDetectorToken(t.Value, t.Begin, t.End)).ToList();
                tk.Last().IsSentenceEnd = true;
                return tk;
            }).ToList());
        }