static void Main(string[] args) { // 形態素解析のMeCabを初期化 var mecabPara = new MeCabParam(); mecabPara.DicDir = @"c:\dic\mecab-ipadic-neologd"; MeCab = MeCabTagger.Create(mecabPara); // データベース関係初期化 Db.Initialize(); // データベースへ接続しブラウザの制御を開始する using (var con = Db.CreateConnection()) using (var conForSearch = Db.CreateConnection()) { con.Open(); conForSearch.Open(); Cmd = con.CreateCommand(); CmdForSearch = conForSearch.CreateCommand(); var cts = new CancellationTokenSource(); var t = StartHttpServer(cts.Token); RunControlBrowser(); StopHttpServer(t, cts); } }
public Form1() { japDict = new Dictionary <string, IJapaneseEntry>(); var dict = new JapaneseDictionary().GetEntries().ToArray(); for (int i = 0; i < dict.Length; i++) { foreach (IKanji k in dict[i].Kanjis) { japDict[k.Text] = dict[i]; } foreach (Wacton.Desu.Japanese.IReading r in dict[i].Readings) { japDict[r.Text] = dict[i]; } } kanjiDict = new Dictionary <string, IKanjiEntry>(); var kanji = new KanjiDictionary().GetEntries().ToArray(); for (int i = 0; i < kanji.Length; i++) { kanjiDict[kanji[i].Literal] = kanji[i]; } tagger = MeCabTagger.Create(); InitializeComponent(); furiganaKanaLabel.SelectedTextChangedEvent += FuriganaKanaLabel_SelectedTextChangedEvent; furiganaRomajiLabel.SelectedTextChangedEvent += FuriganaKanaLabel_SelectedTextChangedEvent; }
public void Run() { try { string sentence = "ユーザが本明細書において提供れるような方法"; MeCabParam param = new MeCabParam(); param.DicDir = @"..\..\dic\ipadic"; MeCabTagger t = MeCabTagger.Create(param); MeCabNode node = t.ParseToNode(sentence); while (node != null) { if (node.CharType > 0) { Console.WriteLine(node.Surface + "\t" + node.Feature); } node = node.Next; } Console.WriteLine(); } catch (Exception ex) { Console.WriteLine(ex.Message); } finally { Console.Read(); } }
public MeCabTagger CreateAndDisposeTagger() { var tagger = MeCabTagger.Create(this.dicDir); tagger.Dispose(); return(tagger); }
/// <summary> /// プログラムのメインエントリです。 /// </summary> /// <param name="args"></param> static void Main(string[] args) { var service = new AnalyzeService(); var file = new DirectoryInfo(@"..\..\..\LineAnalyze.Domain\talk").EnumerateFiles("*.txt").FirstOrDefault() .FullName; var text = File.ReadAllText(file); var talks = service.ParseTalk(text); var meCabTagger = MeCabTagger.Create(); var totalWords = new List <Word>(); foreach (var talk in talks) { var words = service.ParseText(meCabTagger, talk.Message); totalWords.AddRange(words); } var enumerable = totalWords.GroupBy(w => w.Base).Select(x => new { RealName = x.Key, Count = x.Count() }) .Where(x => x.Count > 1) .OrderByDescending(x => x.Count); System.Console.WriteLine("単語,出現回数"); foreach (var data in enumerable) { System.Console.WriteLine(data.RealName + "," + data.Count); } }
public void GlobalSetup() { var dicDir = Helper.SeekDicDir(this.Dic); Console.WriteLine($"Open Dictionaly: {dicDir}"); this.tagger = MeCabTagger.Create(dicDir); }
public void Load(List <string> sentences) { List <string> words = new List <string>(); using (MeCabTagger mecab = MeCabTagger.Create()) { foreach (string sentence in sentences) { var nodes = mecab.ParseToNode(sentence) as MeCabNode; words.Add(BOS); nodes = nodes.Next; while (nodes != null) { words.Add(nodes.Surface); nodes = nodes.Next; } words.Add(EOS); } } for (int i = 0; i < words.Count() / 3; i++) { triplets.Add(new Triplet(new string[] { words[3 * i], words[3 * i + 1], words[3 * i + 2] })); } }
/// <summary> /// 分かち書き /// </summary> /// <returns></returns> private IEnumerable <string> SplitDocumentsWithMeCab(string rawDocument, bool containIsNotPhrageStart) { var words = new List <string>(); MeCabParam param = new MeCabParam(); param.DicDir = @"lib\MeCab\dic\ipadic"; MeCabTagger t = MeCabTagger.Create(param); //形態素解析を行い結果を記録 string result = t.Parse(rawDocument).Replace("\t", ","); var results = result.Split(new string[] { "\r\n" }, StringSplitOptions.None); foreach (var feature in results) { // MeCabの結果を要素ごとに分割 var featureElements = feature.Split(','); // 品詞を解析 // BOS/EOS(開始、終端)を除去する if ("EOS" == featureElements[0] || String.IsNullOrWhiteSpace(featureElements[0]) // || containIsNotPhrageStart && "助詞" == featureElements[1] // || containIsNotPhrageStart && "助動詞" == featureElements[1] || containIsNotPhrageStart && "記号" == featureElements[1]) { continue; } // 文節を結果のリストに格納 yield return(featureElements[0]); } }
/// <summary> /// 形態素解析を行い、結果を保存します。 /// </summary> public void Execute() { var allText = File.ReadAllText(FileName); var mecabParam = new MeCabParam { DicDir = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\Chapter04.Core\dic\ipadic") }; MeCabTagger meCabTagger = MeCabTagger.Create(mecabParam); using (var writer = new StreamWriter(MecabFileName, false)) { MeCabNode node = meCabTagger.ParseToNode(allText); while (node != null) { if (node.CharType > 0) { writer.WriteLine(node.Surface + "," + node.Feature); } node = node.Next; } writer.Flush(); } }
static public String GetHiragana(String dicPathFromExe, String str) { MeCabParam param = new MeCabParam(); param.DicDir = dicPathFromExe; MeCabTagger tagger = MeCabTagger.Create(param); MeCabNode node = tagger.ParseToNode(str); String hiragana = ""; while (node != null) { if (node.CharType > 0) { String[] splitStrArray = node.Feature.Split(','); String splitStr; if (splitStrArray.Length < 9) { splitStr = node.Surface; } else { splitStr = splitStrArray[7]; } hiragana = hiragana + splitStr; } node = node.Next; } return(hiragana); }
private void ExecAnalyzeBtn_Click(object sender, EventArgs e) { if (string.IsNullOrEmpty(fileSelectTextBox.Text)) { ClearBtn_Click(sender, e); return; } string text = ""; using (var reader = new StreamReader(fileSelectTextBox.Text, Encoding.UTF8)) { text = reader.ReadToEnd(); } HashSet <string> targetHinshi = new HashSet <string>(); foreach (CheckBox item in HinshiGroupBox.Controls) { if (item.Checked) { targetHinshi.Add(item.Text); } } var mecab = MeCabTagger.Create(); var node = mecab.ParseToNode(text); var result = new HashSet <AnalyzeResult>(); while (node != null) { if (targetHinshi.Contains(node.Feature.Split(',')[0]) && !string.IsNullOrWhiteSpace(node.Surface)) { var wordOriginal = node.Feature.Split(',')[6]; var exist = result.Where(elem => elem.原形 == wordOriginal); if (exist.Count() == 0) { result.Add(new AnalyzeResult { 表層形 = node.Surface, 品詞 = node.Feature.Split(',')[0], 原形 = wordOriginal, 出現回数 = 1 }); } else { exist.First().出現回数 += 1; } } node = node.Next; } var ds = result.ToList(); ds.Sort((a, b) => b.出現回数 - a.出現回数); analyzeResultDGV.DataSource = ds; }
public static void Init() { var parameter = new MeCabParam { DicDir = "unidic",//词典路径 LatticeLevel = MeCabLatticeLevel.Zero, }; _tagger = MeCabTagger.Create(parameter); var str = File.ReadAllText("customizeDict.txt"); var list = str.Split(Environment.NewLine); _customizeDict = new Dictionary <string, string>(); foreach (var item in list) { if (string.IsNullOrWhiteSpace(item)) { continue; } var array = item.Split(" "); if (array.Length < 2) { continue; } _customizeDict.Add(array[0], array[1]); } }
public async Task InitializeAsync(ICitroid citroid) { if (File.Exists("lengthBrain.json")) { lengthBrain = JsonConvert.DeserializeObject <List <int> >(File.ReadAllText("lengthBrain.json")); } if (File.Exists("wordBrain.json")) { wordBrain = JsonConvert.DeserializeObject <Dictionary <string, Word> >(File.ReadAllText("wordBrain.json")); } if (File.Exists("NazoBrainConfig.json")) { config = JsonConvert.DeserializeObject <NazoBrainConfig>(File.ReadAllText("NazoBrainConfig.json")); } if (lengthBrain == null) { lengthBrain = new List <int>(); } if (wordBrain == null) { wordBrain = new Dictionary <string, Word>(); } if (config == null) { config = new NazoBrainConfig(); } _tagger = MeCabTagger.Create(); _tagger.OutPutFormatType = "wakati"; }
public void SpaceProcessing() { using var tagger = MeCabTagger.Create("../../../../../dic/ipadic"); // スペースのみ var nodes = tagger.Parse(" "); Assert.Empty(nodes); // 既知語 後ろスペース nodes = tagger.Parse("ようこそ "); Assert.Single(nodes); Assert.Equal("ようこそ", nodes[0].Surface); Assert.Equal(0, nodes[0].BPos); Assert.Equal(4, nodes[0].EPos); Assert.Equal(4, nodes[0].Length); Assert.Equal(4, nodes[0].RLength); // 未知語 後ろスペース nodes = tagger.Parse("XXXYYYZZZ "); Assert.Single(nodes); Assert.Equal("XXXYYYZZZ", nodes[0].Surface); Assert.Equal(0, nodes[0].BPos); Assert.Equal(9, nodes[0].EPos); Assert.Equal(9, nodes[0].Length); Assert.Equal(9, nodes[0].RLength); // 既知語 前スペース nodes = tagger.Parse(" ようこそ"); Assert.Single(nodes); Assert.Equal("ようこそ", nodes[0].Surface); Assert.Equal(1 - 1, nodes[0].BPos); Assert.Equal(5, nodes[0].EPos); Assert.Equal(4, nodes[0].Length); Assert.Equal(4 + 1, nodes[0].RLength); // 未知語 前スペース nodes = tagger.Parse(" XXXYYYZZZ"); Assert.Single(nodes); Assert.Equal("XXXYYYZZZ", nodes[0].Surface); Assert.Equal(1 - 1, nodes[0].BPos); Assert.Equal(10, nodes[0].EPos); Assert.Equal(9, nodes[0].Length); Assert.Equal(10, nodes[0].RLength); // 複合 nodes = tagger.Parse(" ようこそ XXXYYYZZZ "); Assert.Equal(2, nodes.Length); Assert.Equal("ようこそ", nodes[0].Surface); Assert.Equal(1 - 1, nodes[0].BPos); Assert.Equal(5, nodes[0].EPos); Assert.Equal(4, nodes[0].Length); Assert.Equal(4 + 1, nodes[0].RLength); Assert.Equal("XXXYYYZZZ", nodes[1].Surface); Assert.Equal(6 - 1, nodes[1].BPos); Assert.Equal(15, nodes[1].EPos); Assert.Equal(9, nodes[1].Length); Assert.Equal(9 + 1, nodes[1].RLength); }
public MeCab(MeCabParam mecabParam) { mecabParam.LatticeLevel = MeCabLatticeLevel.Zero; mecabParam.OutputFormatType = "wakati"; mecabParam.AllMorphs = false; mecabParam.Partial = true; tagger = MeCabTagger.Create(mecabParam); }
public void VeWordParserDebug() { var tagger = MeCabTagger.Create(); string sentence = Example4; foreach (var veWord in tagger.ParseToNodes(sentence).ParseVeWords()) { Trace.WriteLine(veWord.Word); } }
public void CreateMultiTagger() { Parallel.For(0, 99, i => { var tagger1 = MeCabTagger.Create(dicDir); var tagger2 = MeCabTagger.Create(dicDir); tagger1.Dispose(); tagger2.Dispose(); }); }
public static MeCabTagger GetParser() { MeCabParam param = new MeCabParam(); param.DicDir = @"ipadic"; MeCabTagger tagger = MeCabTagger.Create(param); return(tagger); }
public void VeParseWordsBugFixOne() { const string?testString = OccurredNullSample; var tagger = MeCabTagger.Create(); foreach (var _ in tagger.ParseToNodes(testString).ParseVeWords()) { } }
public void OneBest() { using var tagger = MeCabTagger.Create("../../../../../dic/ipadic"); var nodes = tagger.Parse("すもももももももものうち"); Assert.Equal(7, nodes.Length); Assert.Equal("すもも", nodes[0].Surface); Assert.Equal("名詞,一般,*,*,*,*,すもも,スモモ,スモモ", nodes[0].Feature); Assert.Equal("も", nodes[1].Surface); Assert.Equal("助詞,係助詞,*,*,*,*,も,モ,モ", nodes[1].Feature); Assert.Equal("もも", nodes[2].Surface); Assert.Equal("名詞,一般,*,*,*,*,もも,モモ,モモ", nodes[2].Feature); Assert.Equal("も", nodes[3].Surface); Assert.Equal("助詞,係助詞,*,*,*,*,も,モ,モ", nodes[3].Feature); Assert.Equal("もも", nodes[4].Surface); Assert.Equal("名詞,一般,*,*,*,*,もも,モモ,モモ", nodes[4].Feature); Assert.Equal("の", nodes[5].Surface); Assert.Equal("助詞,連体化,*,*,*,*,の,ノ,ノ", nodes[5].Feature); Assert.Equal("うち", nodes[6].Surface); Assert.Equal("名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ", nodes[6].Feature); Assert.True(nodes[0].Prev.IsBest); Assert.True(nodes[1].IsBest); Assert.True(nodes[2].IsBest); Assert.True(nodes[3].IsBest); Assert.True(nodes[4].IsBest); Assert.True(nodes[5].IsBest); Assert.True(nodes[6].IsBest); Assert.True(nodes[6].Next.IsBest); Assert.Equal(MeCabNodeStat.Bos, nodes[0].Prev.Stat); Assert.Equal(MeCabNodeStat.Nor, nodes[0].Stat); Assert.Equal(MeCabNodeStat.Nor, nodes[1].Stat); Assert.Equal(MeCabNodeStat.Nor, nodes[2].Stat); Assert.Equal(MeCabNodeStat.Nor, nodes[3].Stat); Assert.Equal(MeCabNodeStat.Nor, nodes[4].Stat); Assert.Equal(MeCabNodeStat.Nor, nodes[5].Stat); Assert.Equal(MeCabNodeStat.Nor, nodes[6].Stat); Assert.Equal(MeCabNodeStat.Eos, nodes[6].Next.Stat); var tmp = nodes[0].Next; Assert.Same(nodes[1], tmp); tmp = tmp.Next; Assert.Same(nodes[2], tmp); tmp = tmp.Next; Assert.Same(nodes[3], tmp); tmp = tmp.Next; Assert.Same(nodes[4], tmp); tmp = tmp.Next; Assert.Same(nodes[5], tmp); tmp = tmp.Next; Assert.Same(nodes[6], tmp); }
private void InitializeMeCabTagger() { var param = new MeCabParam(); param.DicDir = Path.Combine( Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), @"dic\ipadic" ); _meCabTagger = MeCabTagger.Create(param); }
private void button1_Click(object sender, EventArgs e) { //形態素解析されるもとの文章 string text = richTextBox1.Text;// "私はプログラマーです"; var tagger = MeCabTagger.Create(); //形態素解析を行い結果を記録 string result = tagger.Parse(text); richTextBox2.Text = result; }
public void SetUp() { MeCabParam mecabParam = new MeCabParam { DicDir = Path.Combine(baseDir, @"mecab\ipadic"), }; tagger = MeCabTagger.Create(mecabParam); mecabParam.LatticeLevel = MeCabLatticeLevel.Zero; mecabParam.OutputFormatType = "lattice"; mecabParam.AllMorphs = false; mecabParam.Partial = true; }
public void SetUp() { MeCabParam mecabParam = new MeCabParam { DicDir = TestDataPaths.Unidic, UseMemoryMappedFile = true }; tagger = MeCabTagger.Create(mecabParam); mecabParam.LatticeLevel = MeCabLatticeLevel.Zero; mecabParam.OutputFormatType = "yomi"; mecabParam.AllMorphs = false; mecabParam.Partial = true; }
public MeCab_kaiseki(string sentence, string file_name, string[] header_names) { this.surrogate_sentence = new System.Globalization.StringInfo(sentence); w_tail = 0; goiso_w_tail = 0; w_array1 = new SortedList <int, int>(); goiso_w_array1 = new SortedList <int, int>(); w_array2 = new SortedList <int, string>(); goiso_w_array2 = new SortedList <int, string>(); tag = MeCabTagger.Create(); node = tag.ParseToNode(sentence); this.file_name = file_name; this.header_names = header_names; }
public void VeWordBasicOutputTestMethod() { var sentence = Example3; var tagger = MeCabTagger.Create(); var enumerableSet = tagger.ParseToNodes(sentence); var wordList = VeParser.Words(enumerableSet); foreach (var word in wordList) { Trace.WriteLine( $"{word.Word.PadRight(5, ' ')} {word.Pronunciation} " + $"{word.PartOfSpeech.ToString()} {word.Lemma}"); } }
/// <summary> /// /// </summary> /// <returns></returns> public async Task CacheAllTalk() { var list = await _webScraypingService.ToListRubyistHotlinksUrl(); var allTalks = new List <Talk>(); foreach (var item in list.ToList()) { allTalks.AddRange(await _webScraypingService.ToListTalks(item)); } var mecabParam = new MeCabParam { DicDir = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\RubyistHotlinksReports.Core\dic\ipadic") }; var meCabTagger = MeCabTagger.Create(mecabParam); foreach (var talk in allTalks) { var words = ParseText(meCabTagger, talk.Message).ToList(); foreach (var word in words) { if (word.Pos != "名詞") { continue; } if (word.Base == "*") { continue; } if (word.Pos1 == "非自立") { continue; } if (!AllTalkDictionary.ContainsKey(talk.User)) { AllTalkDictionary.Add(talk.User, new List <Word>()); } AllTalkDictionary[talk.User].Add(word); } } }
public static string MeCabParse(string input) { try { MeCabTagger tagger = MeCabTagger.Create(); tagger.LatticeLevel = MeCabLatticeLevel.Zero; tagger.OutPutFormatType = "lattice"; tagger.AllMorphs = false; tagger.Partial = false; return(tagger.Parse(input)); } catch (Exception ex) { return(ex.ToString()); } }
private void tb_KeyDown(object sender, KeyEventArgs e) { if (e.KeyCode == Keys.Enter) { //形態素解析されるもとの文章 string text = richTextBox1.Text;// "私はプログラマーです"; string[] array_str = text.Split("\n"); var tagger = MeCabTagger.Create(); //形態素解析を行い結果を記録 string result = tagger.Parse(array_str[array_str.Length - 1]); richTextBox2.Text = result; } }
public static List <string[]> Analyze(string input) { var tagger = MeCabTagger.Create(); List <string[]> result = new(); foreach (var node in tagger.ParseToNodes(input)) { if (node.CharType > 0) { string[] surface_feature = new string[] { node.Surface, node.Feature }; result.Add(surface_feature); } } return(result); }