Esempio n. 1
0
        public Form1()
        {
            japDict = new Dictionary <string, IJapaneseEntry>();
            var dict = new JapaneseDictionary().GetEntries().ToArray();

            for (int i = 0; i < dict.Length; i++)
            {
                foreach (IKanji k in dict[i].Kanjis)
                {
                    japDict[k.Text] = dict[i];
                }

                foreach (Wacton.Desu.Japanese.IReading r in dict[i].Readings)
                {
                    japDict[r.Text] = dict[i];
                }
            }

            kanjiDict = new Dictionary <string, IKanjiEntry>();
            var kanji = new KanjiDictionary().GetEntries().ToArray();

            for (int i = 0; i < kanji.Length; i++)
            {
                kanjiDict[kanji[i].Literal] = kanji[i];
            }

            tagger = MeCabTagger.Create();

            InitializeComponent();

            furiganaKanaLabel.SelectedTextChangedEvent   += FuriganaKanaLabel_SelectedTextChangedEvent;
            furiganaRomajiLabel.SelectedTextChangedEvent += FuriganaKanaLabel_SelectedTextChangedEvent;
        }
Esempio n. 2
0
        public MeCabTagger CreateAndDisposeTagger()
        {
            var tagger = MeCabTagger.Create(this.dicDir);

            tagger.Dispose();
            return(tagger);
        }
Esempio n. 3
0
        /// <summary>
        /// 形態素解析を行い、結果を保存します。
        /// </summary>
        public void Execute()
        {
            var allText    = File.ReadAllText(FileName);
            var mecabParam = new MeCabParam
            {
                DicDir = Path.Combine(AppDomain.CurrentDomain.BaseDirectory,
                                      @"..\..\..\Chapter04.Core\dic\ipadic")
            };
            MeCabTagger meCabTagger = MeCabTagger.Create(mecabParam);

            using (var writer = new StreamWriter(MecabFileName, false))
            {
                MeCabNode node = meCabTagger.ParseToNode(allText);
                while (node != null)
                {
                    if (node.CharType > 0)
                    {
                        writer.WriteLine(node.Surface + "," + node.Feature);
                    }

                    node = node.Next;
                }

                writer.Flush();
            }
        }
Esempio n. 4
0
    public void Load(List <string> sentences)
    {
        List <string> words = new List <string>();

        using (MeCabTagger mecab = MeCabTagger.Create())
        {
            foreach (string sentence in sentences)
            {
                var nodes = mecab.ParseToNode(sentence) as MeCabNode;
                words.Add(BOS);
                nodes = nodes.Next;
                while (nodes != null)
                {
                    words.Add(nodes.Surface);
                    nodes = nodes.Next;
                }
                words.Add(EOS);
            }
        }

        for (int i = 0; i < words.Count() / 3; i++)
        {
            triplets.Add(new Triplet(new string[] { words[3 * i], words[3 * i + 1], words[3 * i + 2] }));
        }
    }
Esempio n. 5
0
 public static IEnumerable <MeCabNode> ParseToNodes(this MeCabTagger tagger, string text)
 {
     for (var node = tagger.ParseToNode(text); node != null; node = node.Next)
     {
         yield return(node);
     }
 }
Esempio n. 6
0
        private void ExecAnalyzeBtn_Click(object sender, EventArgs e)
        {
            if (string.IsNullOrEmpty(fileSelectTextBox.Text))
            {
                ClearBtn_Click(sender, e);
                return;
            }

            string text = "";

            using (var reader = new StreamReader(fileSelectTextBox.Text, Encoding.UTF8))
            {
                text = reader.ReadToEnd();
            }

            HashSet <string> targetHinshi = new HashSet <string>();

            foreach (CheckBox item in HinshiGroupBox.Controls)
            {
                if (item.Checked)
                {
                    targetHinshi.Add(item.Text);
                }
            }

            var mecab = MeCabTagger.Create();
            var node  = mecab.ParseToNode(text);

            var result = new HashSet <AnalyzeResult>();

            while (node != null)
            {
                if (targetHinshi.Contains(node.Feature.Split(',')[0]) &&
                    !string.IsNullOrWhiteSpace(node.Surface))
                {
                    var wordOriginal = node.Feature.Split(',')[6];
                    var exist        = result.Where(elem => elem.原形 == wordOriginal);

                    if (exist.Count() == 0)
                    {
                        result.Add(new AnalyzeResult
                        {
                            表層形  = node.Surface,
                            品詞   = node.Feature.Split(',')[0],
                            原形   = wordOriginal,
                            出現回数 = 1
                        });
                    }
                    else
                    {
                        exist.First().出現回数 += 1;
                    }
                }
                node = node.Next;
            }
            var ds = result.ToList();

            ds.Sort((a, b) => b.出現回数 - a.出現回数);
            analyzeResultDGV.DataSource = ds;
        }
Esempio n. 7
0
        public void GlobalSetup()
        {
            var dicDir = Helper.SeekDicDir(this.Dic);

            Console.WriteLine($"Open Dictionaly: {dicDir}");
            this.tagger = MeCabTagger.Create(dicDir);
        }
Esempio n. 8
0
        static public String GetHiragana(String dicPathFromExe, String str)
        {
            MeCabParam param = new MeCabParam();

            param.DicDir = dicPathFromExe;
            MeCabTagger tagger   = MeCabTagger.Create(param);
            MeCabNode   node     = tagger.ParseToNode(str);
            String      hiragana = "";

            while (node != null)
            {
                if (node.CharType > 0)
                {
                    String[] splitStrArray = node.Feature.Split(',');
                    String   splitStr;
                    if (splitStrArray.Length < 9)
                    {
                        splitStr = node.Surface;
                    }
                    else
                    {
                        splitStr = splitStrArray[7];
                    }
                    hiragana = hiragana + splitStr;
                }
                node = node.Next;
            }
            return(hiragana);
        }
Esempio n. 9
0
 public async Task InitializeAsync(ICitroid citroid)
 {
     if (File.Exists("lengthBrain.json"))
     {
         lengthBrain = JsonConvert.DeserializeObject <List <int> >(File.ReadAllText("lengthBrain.json"));
     }
     if (File.Exists("wordBrain.json"))
     {
         wordBrain = JsonConvert.DeserializeObject <Dictionary <string, Word> >(File.ReadAllText("wordBrain.json"));
     }
     if (File.Exists("NazoBrainConfig.json"))
     {
         config = JsonConvert.DeserializeObject <NazoBrainConfig>(File.ReadAllText("NazoBrainConfig.json"));
     }
     if (lengthBrain == null)
     {
         lengthBrain = new List <int>();
     }
     if (wordBrain == null)
     {
         wordBrain = new Dictionary <string, Word>();
     }
     if (config == null)
     {
         config = new NazoBrainConfig();
     }
     _tagger = MeCabTagger.Create();
     _tagger.OutPutFormatType = "wakati";
 }
Esempio n. 10
0
        /// <summary>
        /// 分かち書き
        /// </summary>
        /// <returns></returns>
        private IEnumerable <string> SplitDocumentsWithMeCab(string rawDocument, bool containIsNotPhrageStart)
        {
            var words = new List <string>();

            MeCabParam param = new MeCabParam();

            param.DicDir = @"lib\MeCab\dic\ipadic";
            MeCabTagger t = MeCabTagger.Create(param);

            //形態素解析を行い結果を記録
            string result  = t.Parse(rawDocument).Replace("\t", ",");
            var    results = result.Split(new string[] { "\r\n" }, StringSplitOptions.None);

            foreach (var feature in results)
            {
                // MeCabの結果を要素ごとに分割
                var featureElements = feature.Split(',');
                // 品詞を解析
                // BOS/EOS(開始、終端)を除去する
                if ("EOS" == featureElements[0] ||
                    String.IsNullOrWhiteSpace(featureElements[0])
                    // || containIsNotPhrageStart && "助詞" == featureElements[1]
                    // || containIsNotPhrageStart && "助動詞" == featureElements[1]
                    || containIsNotPhrageStart && "記号" == featureElements[1])
                {
                    continue;
                }
                // 文節を結果のリストに格納
                yield return(featureElements[0]);
            }
        }
Esempio n. 11
0
        public static void Init()
        {
            var parameter = new MeCabParam
            {
                DicDir       = "unidic",//词典路径
                LatticeLevel = MeCabLatticeLevel.Zero,
            };

            _tagger = MeCabTagger.Create(parameter);

            var str  = File.ReadAllText("customizeDict.txt");
            var list = str.Split(Environment.NewLine);

            _customizeDict = new Dictionary <string, string>();
            foreach (var item in list)
            {
                if (string.IsNullOrWhiteSpace(item))
                {
                    continue;
                }
                var array = item.Split(" ");
                if (array.Length < 2)
                {
                    continue;
                }
                _customizeDict.Add(array[0], array[1]);
            }
        }
Esempio n. 12
0
        /// <summary>
        /// プログラムのメインエントリです。
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            var service = new AnalyzeService();
            var file    = new DirectoryInfo(@"..\..\..\LineAnalyze.Domain\talk").EnumerateFiles("*.txt").FirstOrDefault()
                          .FullName;
            var text        = File.ReadAllText(file);
            var talks       = service.ParseTalk(text);
            var meCabTagger = MeCabTagger.Create();
            var totalWords  = new List <Word>();

            foreach (var talk in talks)
            {
                var words = service.ParseText(meCabTagger, talk.Message);
                totalWords.AddRange(words);
            }

            var enumerable = totalWords.GroupBy(w => w.Base).Select(x => new
            {
                RealName = x.Key,
                Count    = x.Count()
            })
                             .Where(x => x.Count > 1)
                             .OrderByDescending(x => x.Count);

            System.Console.WriteLine("単語,出現回数");
            foreach (var data in enumerable)
            {
                System.Console.WriteLine(data.RealName + "," + data.Count);
            }
        }
Esempio n. 13
0
        public void Run()
        {
            try
            {
                string sentence = "ユーザが本明細書において提供れるような方法";

                MeCabParam param = new MeCabParam();
                param.DicDir = @"..\..\dic\ipadic";

                MeCabTagger t    = MeCabTagger.Create(param);
                MeCabNode   node = t.ParseToNode(sentence);
                while (node != null)
                {
                    if (node.CharType > 0)
                    {
                        Console.WriteLine(node.Surface + "\t" + node.Feature);
                    }
                    node = node.Next;
                }
                Console.WriteLine();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
            finally
            {
                Console.Read();
            }
        }
Esempio n. 14
0
        static void Main(string[] args)
        {
            // 形態素解析のMeCabを初期化
            var mecabPara = new MeCabParam();

            mecabPara.DicDir = @"c:\dic\mecab-ipadic-neologd";
            MeCab            = MeCabTagger.Create(mecabPara);

            // データベース関係初期化
            Db.Initialize();

            // データベースへ接続しブラウザの制御を開始する
            using (var con = Db.CreateConnection())
                using (var conForSearch = Db.CreateConnection()) {
                    con.Open();
                    conForSearch.Open();
                    Cmd          = con.CreateCommand();
                    CmdForSearch = conForSearch.CreateCommand();

                    var cts = new CancellationTokenSource();
                    var t   = StartHttpServer(cts.Token);
                    RunControlBrowser();
                    StopHttpServer(t, cts);
                }
        }
Esempio n. 15
0
        public void SpaceProcessing()
        {
            using var tagger = MeCabTagger.Create("../../../../../dic/ipadic");

            // スペースのみ
            var nodes = tagger.Parse(" ");

            Assert.Empty(nodes);

            // 既知語 後ろスペース
            nodes = tagger.Parse("ようこそ ");
            Assert.Single(nodes);
            Assert.Equal("ようこそ", nodes[0].Surface);
            Assert.Equal(0, nodes[0].BPos);
            Assert.Equal(4, nodes[0].EPos);
            Assert.Equal(4, nodes[0].Length);
            Assert.Equal(4, nodes[0].RLength);

            // 未知語 後ろスペース
            nodes = tagger.Parse("XXXYYYZZZ ");
            Assert.Single(nodes);
            Assert.Equal("XXXYYYZZZ", nodes[0].Surface);
            Assert.Equal(0, nodes[0].BPos);
            Assert.Equal(9, nodes[0].EPos);
            Assert.Equal(9, nodes[0].Length);
            Assert.Equal(9, nodes[0].RLength);

            // 既知語 前スペース
            nodes = tagger.Parse(" ようこそ");
            Assert.Single(nodes);
            Assert.Equal("ようこそ", nodes[0].Surface);
            Assert.Equal(1 - 1, nodes[0].BPos);
            Assert.Equal(5, nodes[0].EPos);
            Assert.Equal(4, nodes[0].Length);
            Assert.Equal(4 + 1, nodes[0].RLength);

            // 未知語 前スペース
            nodes = tagger.Parse(" XXXYYYZZZ");
            Assert.Single(nodes);
            Assert.Equal("XXXYYYZZZ", nodes[0].Surface);
            Assert.Equal(1 - 1, nodes[0].BPos);
            Assert.Equal(10, nodes[0].EPos);
            Assert.Equal(9, nodes[0].Length);
            Assert.Equal(10, nodes[0].RLength);

            // 複合
            nodes = tagger.Parse(" ようこそ XXXYYYZZZ ");
            Assert.Equal(2, nodes.Length);
            Assert.Equal("ようこそ", nodes[0].Surface);
            Assert.Equal(1 - 1, nodes[0].BPos);
            Assert.Equal(5, nodes[0].EPos);
            Assert.Equal(4, nodes[0].Length);
            Assert.Equal(4 + 1, nodes[0].RLength);
            Assert.Equal("XXXYYYZZZ", nodes[1].Surface);
            Assert.Equal(6 - 1, nodes[1].BPos);
            Assert.Equal(15, nodes[1].EPos);
            Assert.Equal(9, nodes[1].Length);
            Assert.Equal(9 + 1, nodes[1].RLength);
        }
Esempio n. 16
0
 public MeCab(MeCabParam mecabParam)
 {
     mecabParam.LatticeLevel     = MeCabLatticeLevel.Zero;
     mecabParam.OutputFormatType = "wakati";
     mecabParam.AllMorphs        = false;
     mecabParam.Partial          = true;
     tagger = MeCabTagger.Create(mecabParam);
 }
Esempio n. 17
0
        public void VeParseWordsBugFixOne()
        {
            const string?testString = OccurredNullSample;

            var tagger = MeCabTagger.Create();

            foreach (var _ in tagger.ParseToNodes(testString).ParseVeWords())
            {
            }
        }
Esempio n. 18
0
        public void VeWordParserDebug()
        {
            var    tagger   = MeCabTagger.Create();
            string sentence = Example4;

            foreach (var veWord in tagger.ParseToNodes(sentence).ParseVeWords())
            {
                Trace.WriteLine(veWord.Word);
            }
        }
Esempio n. 19
0
 public void CreateMultiTagger()
 {
     Parallel.For(0, 99, i =>
     {
         var tagger1 = MeCabTagger.Create(dicDir);
         var tagger2 = MeCabTagger.Create(dicDir);
         tagger1.Dispose();
         tagger2.Dispose();
     });
 }
Esempio n. 20
0
        public static MeCabTagger GetParser()
        {
            MeCabParam param = new MeCabParam();

            param.DicDir = @"ipadic";

            MeCabTagger tagger = MeCabTagger.Create(param);

            return(tagger);
        }
Esempio n. 21
0
        public void OneBest()
        {
            using var tagger = MeCabTagger.Create("../../../../../dic/ipadic");
            var nodes = tagger.Parse("すもももももももものうち");

            Assert.Equal(7, nodes.Length);
            Assert.Equal("すもも", nodes[0].Surface);
            Assert.Equal("名詞,一般,*,*,*,*,すもも,スモモ,スモモ", nodes[0].Feature);
            Assert.Equal("も", nodes[1].Surface);
            Assert.Equal("助詞,係助詞,*,*,*,*,も,モ,モ", nodes[1].Feature);
            Assert.Equal("もも", nodes[2].Surface);
            Assert.Equal("名詞,一般,*,*,*,*,もも,モモ,モモ", nodes[2].Feature);
            Assert.Equal("も", nodes[3].Surface);
            Assert.Equal("助詞,係助詞,*,*,*,*,も,モ,モ", nodes[3].Feature);
            Assert.Equal("もも", nodes[4].Surface);
            Assert.Equal("名詞,一般,*,*,*,*,もも,モモ,モモ", nodes[4].Feature);
            Assert.Equal("の", nodes[5].Surface);
            Assert.Equal("助詞,連体化,*,*,*,*,の,ノ,ノ", nodes[5].Feature);
            Assert.Equal("うち", nodes[6].Surface);
            Assert.Equal("名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ", nodes[6].Feature);

            Assert.True(nodes[0].Prev.IsBest);
            Assert.True(nodes[1].IsBest);
            Assert.True(nodes[2].IsBest);
            Assert.True(nodes[3].IsBest);
            Assert.True(nodes[4].IsBest);
            Assert.True(nodes[5].IsBest);
            Assert.True(nodes[6].IsBest);
            Assert.True(nodes[6].Next.IsBest);

            Assert.Equal(MeCabNodeStat.Bos, nodes[0].Prev.Stat);
            Assert.Equal(MeCabNodeStat.Nor, nodes[0].Stat);
            Assert.Equal(MeCabNodeStat.Nor, nodes[1].Stat);
            Assert.Equal(MeCabNodeStat.Nor, nodes[2].Stat);
            Assert.Equal(MeCabNodeStat.Nor, nodes[3].Stat);
            Assert.Equal(MeCabNodeStat.Nor, nodes[4].Stat);
            Assert.Equal(MeCabNodeStat.Nor, nodes[5].Stat);
            Assert.Equal(MeCabNodeStat.Nor, nodes[6].Stat);
            Assert.Equal(MeCabNodeStat.Eos, nodes[6].Next.Stat);

            var tmp = nodes[0].Next;

            Assert.Same(nodes[1], tmp);
            tmp = tmp.Next;
            Assert.Same(nodes[2], tmp);
            tmp = tmp.Next;
            Assert.Same(nodes[3], tmp);
            tmp = tmp.Next;
            Assert.Same(nodes[4], tmp);
            tmp = tmp.Next;
            Assert.Same(nodes[5], tmp);
            tmp = tmp.Next;
            Assert.Same(nodes[6], tmp);
        }
Esempio n. 22
0
        private void InitializeMeCabTagger()
        {
            var param = new MeCabParam();

            param.DicDir = Path.Combine(
                Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location),
                @"dic\ipadic"
                );

            _meCabTagger = MeCabTagger.Create(param);
        }
Esempio n. 23
0
        private void button1_Click(object sender, EventArgs e)
        {
            //形態素解析されるもとの文章
            string text = richTextBox1.Text;// "私はプログラマーです";

            var tagger = MeCabTagger.Create();

            //形態素解析を行い結果を記録
            string result = tagger.Parse(text);

            richTextBox2.Text = result;
        }
Esempio n. 24
0
        public void SetUp()
        {
            MeCabParam mecabParam = new MeCabParam
            {
                DicDir = Path.Combine(baseDir, @"mecab\ipadic"),
            };

            tagger = MeCabTagger.Create(mecabParam);
            mecabParam.LatticeLevel     = MeCabLatticeLevel.Zero;
            mecabParam.OutputFormatType = "lattice";
            mecabParam.AllMorphs        = false;
            mecabParam.Partial          = true;
        }
Esempio n. 25
0
 public QADatabase(string filename, MeCabTagger tagger)
 {
     question = new List <string[]>();
     answer   = new List <string>();
     using (var infile = new StreamReader(filename))
     {
         while (infile.Peek() != -1)
         {
             var x = infile.ReadLine().Split(' ');
             question.Add(Dialog.SplitWord(x[0], tagger));
             answer.Add(x[1]);
         }
     }
 }
Esempio n. 26
0
        public Dialog(string dbfile)
        {
            var subscriptionkey = "YOUR-MICROSOFT-COGNITIVESERVICE-SPEECH-KEY";
            var config          = SpeechConfig.FromSubscription(subscriptionkey, "japaneast");

            config.SpeechRecognitionLanguage = "ja-JP";

            Tagger     = NMeCab.MeCabTagger.Create();
            recognizer = new SpeechRecognizer(config);
            synth      = new SpeechSynthesizer();
            synth.SetOutputToDefaultAudioDevice();
            synth.SelectVoiceByHints(VoiceGender.Female, VoiceAge.Adult, 0, new CultureInfo("ja-JP"));
            db = new QADatabase(dbfile, Tagger);
        }
Esempio n. 27
0
        public void SetUp()
        {
            MeCabParam mecabParam = new MeCabParam
            {
                DicDir = TestDataPaths.Unidic,
                UseMemoryMappedFile = true
            };

            tagger = MeCabTagger.Create(mecabParam);
            mecabParam.LatticeLevel     = MeCabLatticeLevel.Zero;
            mecabParam.OutputFormatType = "yomi";
            mecabParam.AllMorphs        = false;
            mecabParam.Partial          = true;
        }
Esempio n. 28
0
        public MeCab_kaiseki(string sentence, string file_name, string[] header_names)
        {
            this.surrogate_sentence = new System.Globalization.StringInfo(sentence);
            w_tail         = 0;
            goiso_w_tail   = 0;
            w_array1       = new SortedList <int, int>();
            goiso_w_array1 = new SortedList <int, int>();
            w_array2       = new SortedList <int, string>();
            goiso_w_array2 = new SortedList <int, string>();

            tag               = MeCabTagger.Create();
            node              = tag.ParseToNode(sentence);
            this.file_name    = file_name;
            this.header_names = header_names;
        }
Esempio n. 29
0
        public static string[] SplitWord(string result, MeCabTagger tagger)
        {
            var rlist     = new List <string>();
            var tagresult = tagger.ParseToNode(result);

            while (tagresult != null)
            {
                if (tagresult.Stat != MeCabNodeStat.Bos &&
                    tagresult.Stat != MeCabNodeStat.Eos)
                {
                    rlist.Add(tagresult.Surface);
                }
                tagresult = tagresult.Next;
            }
            return(rlist.ToArray());
        }
Esempio n. 30
0
        public void VeWordBasicOutputTestMethod()
        {
            var sentence = Example3;

            var tagger        = MeCabTagger.Create();
            var enumerableSet = tagger.ParseToNodes(sentence);

            var wordList = VeParser.Words(enumerableSet);

            foreach (var word in wordList)
            {
                Trace.WriteLine(
                    $"{word.Word.PadRight(5, ' ')} {word.Pronunciation} " +
                    $"{word.PartOfSpeech.ToString()} {word.Lemma}");
            }
        }