public static String GetHiragana(String dicPathFromExe, String str) { MeCabParam param = new MeCabParam(); param.DicDir = dicPathFromExe; MeCabTagger tagger = MeCabTagger.Create(param); MeCabNode node = tagger.ParseToNode(str); String hiragana = ""; while (node != null) { if (node.CharType > 0) { String[] splitStrArray = node.Feature.Split(','); String splitStr; if (splitStrArray.Length < 9) { splitStr = node.Surface; } else { splitStr = splitStrArray[7]; } hiragana = hiragana + splitStr; } node = node.Next; } return hiragana; }
/// <summary> /// 形態素解析を行い、可能性があるすべての形態素を周辺確率付きで取得します。 /// </summary> /// <param name="sentence">解析対象の文字列へのポインタ</param> /// <param name="length">解析対象の文字列の長さ</param> /// <param name="theta">ソフト分かち書きの温度パラメータ</param> /// <returns>すべての形態素ノードの配列</returns> public unsafe TNode[] ParseSoftWakachi(char *sentence, int length, float theta = MeCabParam.DefaltTheta) { var param = new MeCabParam() { LatticeLevel = MeCabLatticeLevel.Two, Theta = theta } ; return(this.ParseToLattice(sentence, length, param).GetAllNodes()); }
/// <summary> /// コンストラクタ /// </summary> /// <param name="nodeAllocator"></param> /// <param name="param"></param> /// <param name="length"></param> internal MeCabLattice(Func <TNode> nodeAllocator, MeCabParam param, int length) { this.nodeAllocator = nodeAllocator; this.Param = param; this.BeginNodeList = new TNode[length + 1]; this.EndNodeList = new TNode[length + 1]; var bosNode = CreateNewNode(); bosNode.IsBest = true; bosNode.Stat = MeCabNodeStat.Bos; this.EndNodeList[0] = bosNode; this.BosNode = bosNode; var eosNode = CreateNewNode(); eosNode.IsBest = true; eosNode.Stat = MeCabNodeStat.Eos; this.BeginNodeList[length] = eosNode; this.EosNode = eosNode; }
/// <summary> /// MeCabTaggerを開く /// </summary> /// <param name="param">初期化パラメーター</param> private void Open(MeCabParam param) { this.viterbi.Open(param); this.writer.Open(param); }
/// <summary> /// 形態素解析を行い、結果をラティスとして取得します /// </summary> /// <param name="sentence">解析対象の文字列へのポインタ</param> /// <param name="length">解析対象の文字列の長さ</param> /// <param name="param">解析パラメータ</param> /// <returns>ラティス</returns> public unsafe MeCabLattice <TNode> ParseToLattice(char *sentence, int length, MeCabParam param) { this.ThrowIfDisposed(); if (length < 0) { throw new ArgumentOutOfRangeException(nameof(length)); } var lattice = new MeCabLattice <TNode>(this.CreateNewNode, param, length); this.viterbi.Analyze(sentence, length, lattice); return(lattice); }
private void InitializeMeCabTagger() { var param = new MeCabParam(); param.DicDir = Path.Combine( Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), @"dic\ipadic" ); _meCabTagger = MeCabTagger.Create(param); }
static void Main(string[] args) { Console.WriteLine("<html><head><title>小倉 - 自然科学系和英大辞典</title></head><body>"); var nmparam = new NMeCab.MeCabParam(); System.IO.Directory.SetCurrentDirectory(System.IO.Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location)); nmparam.DicDir = "dic/ipadic"; var meCabTagger = NMeCab.MeCabTagger.Create(nmparam); var genreList = new Dictionary <string, int>(); while (true) { var title = Console.In.ReadLine(); if (title == null) { break; } var content = Console.In.ReadLine(); if (Console.In.ReadLine() != "") { throw new Exception(); } content = content.TrimStart(); var titleBase = System.Text.RegularExpressions.Regex.Replace(title, @"\(.+?\)", ""); titleBase = System.Text.RegularExpressions.Regex.Replace(title, @"\[.+?\]", ""); var titleList = titleBase.Split(','); var genreMc = System.Text.RegularExpressions.Regex.Matches(title, @"\[.+?\]"); var genres = new List <String>(); foreach (System.Text.RegularExpressions.Match match in genreMc) { genres.Add(match.Value.TrimStart('[').TrimEnd(']')); } var contentBase = System.Text.RegularExpressions.Regex.Replace(content, @"\[.+?\]", ""); var contentList = contentBase.Split(';'); for (int i = 0; i < contentList.Count(); i++) { contentList[i] = contentList[i].TrimStart(); contentList[i] = System.Text.RegularExpressions.Regex.Replace(contentList[i], @"^an? ", ""); contentList[i] = System.Text.RegularExpressions.Regex.Replace(contentList[i], @"^the ", ""); } Console.WriteLine("<dl><dt>" + title + "</dt>"); foreach (var text in titleList) { Console.WriteLine("<key type=\"表記\">" + text + "</key>"); try { var node = meCabTagger.ParseToNode(text).Next; string furigana = ""; while (node != null && node.Stat != MeCabNodeStat.Eos) { furigana += node.Feature.Split(',')[7]; node = node.Next; } Console.WriteLine("<key type=\"かな\">" + furigana + "</key>"); } catch { } } foreach (var text in genres) { Console.WriteLine("<key type=\"複合\" name=\"ジャンル\">" + text + "</key>"); if (genreList.Keys.Contains(text)) { genreList[text]++; } else { genreList.Add(text, 1); } } foreach (var text in contentList) { var ttext = text.TrimStart(); ttext = ttext.TrimEnd(); Console.WriteLine("<key type=\"表記\">" + ttext + "</key>"); var words = ttext.Split(' ', '[', ']', '(', ')'); foreach (var word in words) { var w = word.TrimStart().TrimEnd(); Console.WriteLine("<key type=\"クロス\">" + word + "</key>"); } } Console.WriteLine("<dd>" + content + "</dd></dl>"); } Console.WriteLine("</body></html>"); if (args.Count() > 0) { using (var sw = new System.IO.StreamWriter(args[0], false, System.Text.Encoding.GetEncoding("shift_jis"))) { sw.Write("<?xml version=\"1.0\" encoding=\"Shift_JIS\"?><complex><group name=\"ジャンル検索\"><category name=\"ジャンル\">"); foreach (var kvp in genreList) { if (kvp.Value > 2) { sw.WriteLine("<subcategory name=\"" + kvp.Key + "\">"); } } sw.WriteLine("</category><keyword name=\"キーワード1\" /><keyword name=\"キーワード2\" /></group></complex>"); } } }
/// <summary> /// MeCabTaggerのインスタンスを生成する /// </summary> /// <param name="param">初期化パラメーター</param> /// <returns>MeCabTaggerのインスタンス</returns> public static MeCabTagger Create(MeCabParam param) { MeCabTagger tagger = new MeCabTagger(); tagger.Open(param); return tagger; }
/// <summary> /// MeCabTaggerのインスタンスを生成する /// </summary> /// <returns>MeCabTaggerのインスタンス</returns> public static MeCabTagger Create() { MeCabParam param = new MeCabParam(); param.LoadDicRC(); return MeCabTagger.Create(param); }