Exemplo n.º 1
0
        public void Tanaka()
        {
            var sentences         = new Tanaka(TestDataPaths.Tanaka, Encoding.UTF8).AllSentences();
            var features          = new HashSet <string>();
            var sentencesFiltered = new HashSet <string>();
            var n = 0;

            foreach (var rawSentence in sentences.Select(s => s.JapaneseSentence))
            {
                Console.WriteLine(tagger.Parse(rawSentence));
                var c = tagger.ParseToNodes(rawSentence);
                foreach (var morpheme in c)
                {
                    var feature = morpheme.Feature;
                    if (feature != null)
                    {
                        Console.WriteLine($"{morpheme.Surface} {feature}");
                    }
                    n++;
                    if (n == 20)
                    {
                        Assert.Fail();
                    }
                }
            }
        }
Exemplo n.º 2
0
        public void Tanaka()
        {
            var tanaka = new Tanaka(Path.Combine(baseDir, @"corpora\examples.utf.gz"), Encoding.UTF8);
            var meCab  = new MeCabUnidic(new MeCabParam
            {
                DicDir = Path.Combine(baseDir, @"mecab\unidic"),
            });
            var sentences         = tanaka.AllSentences();
            var features          = new HashSet <string>();
            var sentencesFiltered = new HashSet <string>();

            foreach (var rawSentence in sentences.Select(s => s.JapaneseSentence))
            {
                var sentence = meCab.ParseToEntries(rawSentence)
                               .Where(e => e.IsRegular)
                               .ToList();
                foreach (var word in sentence)
                {
                    foreach (var s in word.PartOfSpeechSections)
                    {
                        var newElement = features.Add(s);
                        if (newElement)
                        {
                            sentencesFiltered.Add(rawSentence);
                        }
                    }
                }
            }
            var ss = string.Join("\n", sentencesFiltered);
            var xx = string.Join("\n", features);

            ;
        }
 public ReplCorpora(
     BasicExpressionsCorpus bec,
     Tanaka tanaka,
     JESC jesc)
 {
     Bec    = bec;
     Tanaka = tanaka;
     Jesc   = jesc;
 }
Exemplo n.º 4
0
    protected override async Task Start()
    {
        if (!File.Exists(newPath))
        {
            try
            {
                this.CurrentStatus = new UpdateStatus.DownloadingStatus(null);
                await this.httpClient.GetToFileAsync(url, newPath);
            }
            catch
            {
                File.Delete(newPath);
                throw;
            }
        }

        this.CurrentStatus = new UpdateStatus.ProcessingStatus();

        try
        {
            await Task.Run(() =>
            {
                var tanaka = new Tanaka(newPath, Encoding.UTF8);
                using (var corpus = new Corpus(tanaka.AllSentences, analyzer, newCachePath))
                {
                }
            });

            File.Delete(oldPath);
            File.Move(newPath, oldPath);
            if (oldCachePath != null)
            {
                File.Delete(oldCachePath);
                File.Move(newCachePath, oldCachePath);
            }

            this.CurrentStatus = new UpdateStatus.SuccessStatus();
        }
        catch
        {
            this.CurrentStatus = new UpdateStatus.FailureStatus("Cache creation failed");
            if (oldCachePath != null)
            {
                File.Delete(newCachePath);
            }
            throw;
        }
    }
Exemplo n.º 5
0
 public TanakaCorpusDataSource(Tanaka tanaka)
 {
     this.tanaka = tanaka;
 }