Ejemplo n.º 1
0
        public AnonyConverter()
        {
            var dllPath = System.Reflection.Assembly.GetExecutingAssembly().Location;
            var dicPath = Path.Combine(Path.GetDirectoryName(dllPath), "IpaDic");

            this.tagger = MeCabIpaDicTagger.Create(dicPath);
        }
Ejemplo n.º 2
0
    static void UseSoftWakachi()
    {
        Console.WriteLine("----------------------------------------------------------------------");
        Console.WriteLine("Example of using Soft-Wakachi :");
        Console.WriteLine();

        using (var tagger = MeCabIpaDicTagger.Create())
        {
            var theta = 1f / 800f / 2f;                           // 温度パラメータ
            var nodes = tagger.ParseSoftWakachi("本部長", theta);    // ソフトわかち解を取得

            foreach (var node in nodes.Where(n => n.Prob > 0.1f)) // 周辺確率>0.1の形態素ノードだけを処理
            {
                Console.WriteLine($"表層形 :{node.Surface}");
                Console.WriteLine($"読み  :{node.Reading}");
                Console.WriteLine($"品詞  :{node.PartsOfSpeech}");
                Console.WriteLine($"周辺確率:{node.Prob}");
                Console.WriteLine();
            }

            // さらに、周辺確率の上位から表層形の異なるものの5件までを取得
            var searchWords = nodes.OrderByDescending(n => n.Prob)
                              .Select(n => n.Surface)
                              .Distinct()
                              .Take(5);
            Console.WriteLine($"上位ワード:{string.Join(",", searchWords)}");
        }
    }
Ejemplo n.º 3
0
        public void ParseTest()
        {
            const string dicDir = "../../../../../dic/ipadic";

            using (var tagger = MeCabIpaDicTagger.Create(dicDir))
            {
                var nodes = tagger.Parse("東京へ行け");

                var node1 = nodes[0];
                Assert.Equal("名詞", node1.PartsOfSpeech);
                Assert.Equal("固有名詞", node1.PartsOfSpeechSection1);
                Assert.Equal("地域", node1.PartsOfSpeechSection2);
                Assert.Equal("一般", node1.PartsOfSpeechSection3);
                Assert.Equal("*", node1.ConjugatedForm);
                Assert.Equal("*", node1.Inflection);
                Assert.Equal("東京", node1.OriginalForm);
                Assert.Equal("トウキョウ", node1.Reading);
                Assert.Equal("トーキョー", node1.Pronounciation);

                var node2 = nodes[2];
                Assert.Equal("動詞", node2.PartsOfSpeech);
                Assert.Equal("自立", node2.PartsOfSpeechSection1);
                Assert.Equal("*", node2.PartsOfSpeechSection2);
                Assert.Equal("*", node2.PartsOfSpeechSection3);
                Assert.Equal("五段・カ行促音便", node2.ConjugatedForm);
                Assert.Equal("命令e", node2.Inflection);
                Assert.Equal("行く", node2.OriginalForm);
                Assert.Equal("イケ", node2.Reading);
                Assert.Equal("イケ", node2.Pronounciation);
            }
        }
Ejemplo n.º 4
0
        public void OneTimeSetUp()
        {
            // NOTE : フォルダ移動の可能性があるので,"UniBagOfWords"のパスから見つける
            var topDirectoryPath = Directory.GetDirectories("Assets", "*", SearchOption.AllDirectories)
                                   .FirstOrDefault(path => Path.GetFileName(path) == "UniBagOfWords");
            var dicDir = $"{topDirectoryPath}/Scripts/dic/ipadic";

            _tagger = MeCabIpaDicTagger.Create(dicDir);
        }
Ejemplo n.º 5
0
        public NMeCabMorphAnalyzer(NMeCabSetting setting)
        {
            if (string.IsNullOrEmpty(setting.DicDir))
            {
                throw new ArgumentException("dicDirが正しく設定されていません");
            }

            DicDir  = setting.DicDir;
            _tagger = MeCabIpaDicTagger.Create(DicDir);
        }
Ejemplo n.º 6
0
        public void IpaDic()
        {
            using var tagger = MeCabIpaDicTagger.Create("../../../../../dic/ipadic");
            var node = tagger.Parse("すもも")[0];

            Assert.Equal("名詞", node.PartsOfSpeech);
            Assert.Equal("一般", node.PartsOfSpeechSection1);
            Assert.Equal("*", node.PartsOfSpeechSection2);
            Assert.Equal("*", node.PartsOfSpeechSection3);
            Assert.Equal("*", node.ConjugatedForm);
            Assert.Equal("*", node.Inflection);
            Assert.Equal("すもも", node.OriginalForm);
            Assert.Equal("スモモ", node.Reading);
            Assert.Equal("スモモ", node.Pronounciation);
        }
Ejemplo n.º 7
0
    static void UseNotBeAwareOfDictionaly()
    {
        Console.WriteLine("----------------------------------------------------------------------");
        Console.WriteLine("Example of using not be aware of dictionary :");
        Console.WriteLine();

        using (var tagger = MeCabIpaDicTagger.Create()) // Taggerインスタンスを生成
        {
            var nodes = tagger.Parse("皇帝の新しい心");        // 形態素解析を実行
            foreach (var node in nodes)                 // 形態素ノード配列を順に処理
            {
                Console.WriteLine($"表層形:{node.Surface}");
                Console.WriteLine($"読み :{node.Reading}");
                Console.WriteLine($"品詞 :{node.PartsOfSpeech}");
                Console.WriteLine();
            }
        }
    }
Ejemplo n.º 8
0
    static void UseWithUserPreparedDictionaly2()
    {
        Console.WriteLine("----------------------------------------------------------------------");
        Console.WriteLine("Example of using user prepared dictionaly 2 (for IPAdic) :");
        Console.WriteLine();

        var dicDir = "../../../../../dic/ipadic";             // 辞書のパス

        using (var tagger = MeCabIpaDicTagger.Create(dicDir)) // IPAdic形式用のTaggerインスタンスを生成
        {
            var nodes = tagger.Parse("皇帝の新しい心");              // 形態素解析を実行
            foreach (var node in nodes)                       // 形態素ノード配列を順に処理
            {
                Console.WriteLine($"表層形:{node.Surface}");
                Console.WriteLine($"読み :{node.Reading}");       // 個別の素性
                Console.WriteLine($"品詞 :{node.PartsOfSpeech}"); // 〃
                Console.WriteLine();
            }
        }
    }
Ejemplo n.º 9
0
    static void UseNBest()
    {
        Console.WriteLine("----------------------------------------------------------------------");
        Console.WriteLine("Example of using N-Best :");
        Console.WriteLine();

        using (var tagger = MeCabIpaDicTagger.Create())
        {
            var results = tagger.ParseNBest("東京大学"); // Nベスト解を取得
            foreach (var nodes in results.Take(5))   // 上位から5件までの解を処理
            {
                foreach (var node in nodes)          // 形態素ノード配列を順に処理
                {
                    Console.WriteLine($"表層形:{node.Surface}");
                    Console.WriteLine($"読み :{node.Reading}");
                    Console.WriteLine($"品詞 :{node.PartsOfSpeech}");
                    Console.WriteLine();
                }

                Console.WriteLine("----------------");
            }
        }
    }
Ejemplo n.º 10
0
 public MeCabIpaDicWrapper(string dicDir, string[]?userDirDics = null)
 {
     IpaDicTagger = MeCabIpaDicTagger.Create(dicDir, userDirDics);
 }
Ejemplo n.º 11
0
        static void Main()
        {
            var dicDir     = "../../../../../dic/ipadic";
            var targetFile = "kokoro.txt";
            var encoding   = Encoding.UTF8;
            var sw         = new Stopwatch();

            //開始指示を待機
            Console.WriteLine("Press Enter key to start.");
            Console.ReadLine();

            Console.WriteLine("\t\t\tProcessTime\tTotalMemory");

            //解析準備処理
            GC.Collect();
            sw.Start();
            var tagger = MeCabIpaDicTagger.Create(dicDir);

            sw.Stop();
            Console.WriteLine("OpenTagger:\t\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));

            //ファイル読込だけの場合
            using (var reader = new StreamReader(targetFile, encoding))
            {
                sw.Reset();
                GC.Collect();
                sw.Start();
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                }
                sw.Stop();
            }
            Console.WriteLine("ReadLine:\t\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));

            //解析処理(Nodeの出力)
            using (var reader = new StreamReader(targetFile, encoding))
            {
                sw.Reset();
                GC.Collect();
                sw.Start();
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                    var node = tagger.Parse(line);
                }
                sw.Stop();
            }
            Console.WriteLine("ParseToNode:\t\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));

            //解析処理(素性文字列分解)
            using (var reader = new StreamReader(targetFile, encoding))
            {
                sw.Reset();
                GC.Collect();
                sw.Start();
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                    var node = tagger.Parse(line);
                    foreach (var item in node)
                    {
                        var a = item.Surface;
                        var b = item.PartsOfSpeech;
                        var c = item.Reading;
                    }
                }
                sw.Stop();
            }
            Console.WriteLine("ParseToText:\t\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));

            //解析処理(Best解5件のNodeの出力)
            using (var reader = new StreamReader(targetFile, encoding))
            {
                sw.Reset();
                GC.Collect();
                sw.Start();
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                    int i = 0;
                    foreach (var node in tagger.ParseNBest(line))
                    {
                        if (++i == 5)
                        {
                            break;
                        }
                    }
                }
                sw.Stop();
            }
            Console.WriteLine("ParseNBestToNode:\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));

            //対象の情報
            using (var reader = new StreamReader(targetFile, encoding))
            {
                long charCount = 0;
                long lineCount = 0;
                long wordCount = 0;
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                    charCount += line.Length;
                    lineCount++;
                    var node = tagger.Parse(line);
                    wordCount += node.Length;
                }
                Console.WriteLine();
                Console.WriteLine("Target: {0} {1:#,000}byte {2:#,000}char {3:#,000}line ({4:#,000}word)",
                                  targetFile, reader.BaseStream.Position, charCount, lineCount, wordCount);
            }

            tagger.Dispose();

            //終了したことを通知
            Console.WriteLine();
            Console.WriteLine("Finish!");
            Console.WriteLine("Press Enter key to close.");
            Console.ReadLine();
        }
Ejemplo n.º 12
0
 public KawazuConverter()
 {
     _tagger = MeCabIpaDicTagger.Create();
 }
Ejemplo n.º 13
0
 public KawazuConverter(string dicPath = null)
 {
     _tagger = MeCabIpaDicTagger.Create(dicPath);
 }
Ejemplo n.º 14
0
 public KawazuConverter(string dicdir)
 {
     _tagger = MeCabIpaDicTagger.Create(dicdir);
 }
Ejemplo n.º 15
0
    static void UseLattice()
    {
        Console.WriteLine("----------------------------------------------------------------------");
        Console.WriteLine("Example of using Lattice :");
        Console.WriteLine();

        using (var tagger = MeCabIpaDicTagger.Create())
        {
            var prm = new MeCabParam()
            {
                LatticeLevel = MeCabLatticeLevel.Two,
                Theta        = 1f / 800f / 2f
            };

            var lattice = tagger.ParseToLattice("東京大学", prm); // ラティスを取得

            // ラティスから、ベスト解を取得し処理
            foreach (var node in lattice.GetBestNodes())
            {
                Console.Write(node.Surface);
                Console.CursorLeft = 10;
                Console.Write(node.Feature);
                Console.WriteLine();
            }

            Console.WriteLine("--------");

            // ラティスから、2番目と3番目のベスト解を取得し処理
            foreach (var result in lattice.GetNBestResults().Skip(1).Take(2))
            {
                foreach (var node in result)
                {
                    Console.Write(node.Surface);
                    Console.CursorLeft = 10;
                    Console.Write(node.Feature);
                    Console.WriteLine();
                }

                Console.WriteLine("----");
            }

            Console.WriteLine("--------");

            // ラティスから、開始位置別の形態素を取得し処理
            for (int i = 0; i < lattice.BeginNodeList.Length - 1; i++)
            {
                for (var node = lattice.BeginNodeList[i]; node != null; node = node.BNext)
                {
                    if (node.Prob <= 0.001f)
                    {
                        continue;
                    }

                    Console.CursorLeft = i * 2;
                    Console.Write(node.Surface);
                    Console.CursorLeft = 10;
                    Console.Write(node.Prob.ToString("F3"));
                    Console.CursorLeft = 16;
                    Console.Write(node.Feature);
                    Console.WriteLine();
                }
            }

            Console.WriteLine("--------");

            // ラティスから、最終的な累積コストのみを取得し表示
            Console.WriteLine(lattice.EosNode.Cost);
        }
    }