public void Tanaka() { var sentences = new Tanaka(TestDataPaths.Tanaka, Encoding.UTF8).AllSentences(); var features = new HashSet <string>(); var sentencesFiltered = new HashSet <string>(); var n = 0; foreach (var rawSentence in sentences.Select(s => s.JapaneseSentence)) { Console.WriteLine(tagger.Parse(rawSentence)); var c = tagger.ParseToNodes(rawSentence); foreach (var morpheme in c) { var feature = morpheme.Feature; if (feature != null) { Console.WriteLine($"{morpheme.Surface} {feature}"); } n++; if (n == 20) { Assert.Fail(); } } } }
public void Tanaka() { var tanaka = new Tanaka(Path.Combine(baseDir, @"corpora\examples.utf.gz"), Encoding.UTF8); var meCab = new MeCabUnidic(new MeCabParam { DicDir = Path.Combine(baseDir, @"mecab\unidic"), }); var sentences = tanaka.AllSentences(); var features = new HashSet <string>(); var sentencesFiltered = new HashSet <string>(); foreach (var rawSentence in sentences.Select(s => s.JapaneseSentence)) { var sentence = meCab.ParseToEntries(rawSentence) .Where(e => e.IsRegular) .ToList(); foreach (var word in sentence) { foreach (var s in word.PartOfSpeechSections) { var newElement = features.Add(s); if (newElement) { sentencesFiltered.Add(rawSentence); } } } } var ss = string.Join("\n", sentencesFiltered); var xx = string.Join("\n", features); ; }
public ReplCorpora( BasicExpressionsCorpus bec, Tanaka tanaka, JESC jesc) { Bec = bec; Tanaka = tanaka; Jesc = jesc; }
protected override async Task Start() { if (!File.Exists(newPath)) { try { this.CurrentStatus = new UpdateStatus.DownloadingStatus(null); await this.httpClient.GetToFileAsync(url, newPath); } catch { File.Delete(newPath); throw; } } this.CurrentStatus = new UpdateStatus.ProcessingStatus(); try { await Task.Run(() => { var tanaka = new Tanaka(newPath, Encoding.UTF8); using (var corpus = new Corpus(tanaka.AllSentences, analyzer, newCachePath)) { } }); File.Delete(oldPath); File.Move(newPath, oldPath); if (oldCachePath != null) { File.Delete(oldCachePath); File.Move(newCachePath, oldCachePath); } this.CurrentStatus = new UpdateStatus.SuccessStatus(); } catch { this.CurrentStatus = new UpdateStatus.FailureStatus("Cache creation failed"); if (oldCachePath != null) { File.Delete(newCachePath); } throw; } }
public TanakaCorpusDataSource(Tanaka tanaka) { this.tanaka = tanaka; }