Exemple #1
0
 public void Open(MeCabParam param)
 {
     this.tokenizer.Open(param);
     this.connector.Open(param);
     this.costFactor   = param.CostFactor;
     this.Theta        = param.Theta;
     this.LatticeLevel = param.LatticeLevel;
     this.Partial      = param.Partial;
     this.AllMorphs    = param.AllMorphs;
 }
        private void InitializeMeCabTagger()
        {
            var param = new MeCabParam();

            param.DicDir = Path.Combine(
                Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location),
                @"dic\ipadic"
                );

            _meCabTagger = MeCabTagger.Create(param);
        }
        public void SetUp()
        {
            MeCabParam mecabParam = new MeCabParam
            {
                DicDir = Path.Combine(baseDir, @"mecab\ipadic"),
            };

            tagger = MeCabTagger.Create(mecabParam);
            mecabParam.LatticeLevel     = MeCabLatticeLevel.Zero;
            mecabParam.OutputFormatType = "lattice";
            mecabParam.AllMorphs        = false;
            mecabParam.Partial          = true;
        }
Exemple #4
0
		public void Open(MeCabParam param)
		{
			this.dic = new MeCabDictionary[param.UserDic.Length + 1];
			string dicDir = param.DicDir;
			this.property.Open(dicDir);
			this.unkDic.Open(Path.Combine(dicDir, "unk.dic"));
			if (this.unkDic.Type != DictionaryType.Unk)
			{
				throw new MeCabInvalidFileException("not a unk dictionary", this.unkDic.FileName);
			}
			MeCabDictionary meCabDictionary = new MeCabDictionary();
			meCabDictionary.Open(Path.Combine(dicDir, "sys.dic"));
			if (meCabDictionary.Type != 0)
			{
				throw new MeCabInvalidFileException("not a system dictionary", meCabDictionary.FileName);
			}
			this.dic[0] = meCabDictionary;
			for (int i = 0; i < param.UserDic.Length; i++)
			{
				MeCabDictionary meCabDictionary2 = new MeCabDictionary();
				meCabDictionary2.Open(Path.Combine(dicDir, param.UserDic[i]));
				if (meCabDictionary2.Type != DictionaryType.Usr)
				{
					throw new MeCabInvalidFileException("not a user dictionary", meCabDictionary2.FileName);
				}
				if (!meCabDictionary.IsCompatible(meCabDictionary2))
				{
					throw new MeCabInvalidFileException("incompatible dictionary", meCabDictionary2.FileName);
				}
				this.dic[i + 1] = meCabDictionary2;
			}
			this.unkTokens = new Token[this.property.Size][];
			for (int j = 0; j < this.unkTokens.Length; j++)
			{
				string text = this.property.Name(j);
				DoubleArray.ResultPair n = this.unkDic.ExactMatchSearch(text);
				if (n.Value == -1)
				{
					throw new MeCabInvalidFileException("cannot find UNK category: " + text, this.unkDic.FileName);
				}
				this.unkTokens[j] = this.unkDic.GetToken(n);
			}
			this.space = this.property.GetCharInfo(' ');
			this.bosFeature = param.BosFeature;
			this.unkFeature = param.UnkFeature;
			this.maxGroupingSize = param.MaxGroupingSize;
			if (this.maxGroupingSize <= 0)
			{
				this.maxGroupingSize = 24;
			}
		}
Exemple #5
0
        public void SetUp()
        {
            MeCabParam mecabParam = new MeCabParam
            {
                DicDir = TestDataPaths.Unidic,
                UseMemoryMappedFile = true
            };

            tagger = MeCabTagger.Create(mecabParam);
            mecabParam.LatticeLevel     = MeCabLatticeLevel.Zero;
            mecabParam.OutputFormatType = "yomi";
            mecabParam.AllMorphs        = false;
            mecabParam.Partial          = true;
        }
Exemple #6
0
        public void Open(MeCabParam param)
        {
            tokenizer = new Tokenizer();
            tokenizer.Open(param);
            connector = param.UseMemoryMappedFile
                ? new ConnectorMMF() as IConnector
                : new Connector() as IConnector;
            connector.Open(param);

            this.costFactor   = param.CostFactor;
            this.Theta        = param.Theta;
            this.LatticeLevel = param.LatticeLevel;
            this.Partial      = param.Partial;
            this.AllMorphs    = param.AllMorphs;
        }
Exemple #7
0
        /// <summary>
        ///
        /// </summary>
        /// <returns></returns>
        public async Task CacheAllTalk()
        {
            var list = await _webScraypingService.ToListRubyistHotlinksUrl();

            var allTalks = new List <Talk>();

            foreach (var item in list.ToList())
            {
                allTalks.AddRange(await _webScraypingService.ToListTalks(item));
            }
            var mecabParam = new MeCabParam
            {
                DicDir = Path.Combine(AppDomain.CurrentDomain.BaseDirectory,
                                      @"..\..\..\RubyistHotlinksReports.Core\dic\ipadic")
            };
            var meCabTagger = MeCabTagger.Create(mecabParam);

            foreach (var talk in allTalks)
            {
                var words = ParseText(meCabTagger, talk.Message).ToList();
                foreach (var word in words)
                {
                    if (word.Pos != "名詞")
                    {
                        continue;
                    }

                    if (word.Base == "*")
                    {
                        continue;
                    }

                    if (word.Pos1 == "非自立")
                    {
                        continue;
                    }

                    if (!AllTalkDictionary.ContainsKey(talk.User))
                    {
                        AllTalkDictionary.Add(talk.User, new List <Word>());
                    }

                    AllTalkDictionary[talk.User].Add(word);
                }
            }
        }
Exemple #8
0
        static void Main(string[] args)
        {
            var sentence = "SOS団には涼宮ハルヒ、キョン、長門有希、朝比奈みくる、古泉一樹の5人がいる。";

            var param = new MeCabParam();

            param.DicDir  = @"./dic/ipadic";
            param.UserDic = new[] { @"../userdic/haruhi.dic" };

            var mecab = MeCabTagger.Create(param);
            var node  = mecab.ParseToNode(sentence);

            while (node != null)
            {
                // if (node.CharType > 0)
                Console.WriteLine(node.Surface + "\t" + node.Feature);

                node = node.Next;
            }
        }
Exemple #9
0
    private string parse(string sentence)
    {
        MeCabParam mecabParam = new MeCabParam();

        mecabParam.DicDir = @"Assets/dic/ipadic";

        MeCabTagger t    = MeCabTagger.Create(mecabParam);
        MeCabNode   node = t.ParseToNode(sentence);

        string result = "";

        while (node != null)
        {
            if (node.CharType > 0)
            {
                result += node.Surface + "\t" + node.Feature + "\n";
            }
            node = node.Next;
        }

        return(result);
    }
        public string Modified_PunctuationResult(string i_string)
        {
            MeCabParam param = new MeCabParam();

            param.DicDir = @"C:\Program Files (x86)\MeCab\dic\ipadic";
            string o_string = "";

            using (var tagger = MeCabTagger.Create(param))
            {
                MeCabNode node = tagger.ParseToNode(i_string);
                while (node != null)
                {
                    if (node.CharType > 0)
                    {
                        //var features = node.Feature.Split(',');
                        //var displayFeatures = string.Join(", ", features);
                        o_string += node.Surface + " ";
                    }
                    node = node.Next;
                }
            }
            return(o_string);
        }
Exemple #11
0
        public static string GetNMeCabToFurigana(string strInput)
        {
            string str = string.Empty, strYomi;

            try
            {
                MeCabParam  mcp = new MeCabParam();
                MeCabTagger mct = MeCabTagger.Create();
                MeCabNode   mcn = mct.ParseToNode(strInput);
                MecabResult mcr = new MecabResult(mcn);

                foreach (MecabResult.MecabResultItem mcri in mcr.nodes)
                {
                    strYomi = String.Empty;
                    if (String.Compare(mcri.読み, "*") == 0)
                    {
                        // 形態素解析を持ってしても読めない場合、OutputYomiganaを使用する。
                        strYomi = ModuleReuseClass.OutputYomigana(mcri.表層形);
                    }
                    else
                    {
                        strYomi = mcri.読み;
                    }
                    str += strYomi;
                }
                //カタカナをひらがなに変換する
                str = Strings.StrConv(str, VbStrConv.Hiragana, 0x411);
                //あいうえおかきくけこサシスセソnaninuneno
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message, "エラー", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }

            return(str);
        }
Exemple #12
0
        static void Main(string[] args)
        {
            var mPara = new MeCabParam();

            //辞書ファイルがあるフォルダを指定(NuGetで入れれば勝手に入る)
            mPara.DicDir = @"c:\dic\mecab-ipadic-neologd";

            var    mTagger   = MeCabTagger.Create(mPara);
            string line      = null;
            var    receivers = new List <Func <string, bool> >();

            while ((line = Console.ReadLine()) != null)
            {
                var node = mTagger.ParseToNode(line);
                while (node != null)
                {
                    if (node.CharType > 0)
                    {
                        Console.WriteLine("{0}\t{1}", node.Surface, node.Feature);
                    }
                    node = node.Next;
                }
            }
        }
 public MecabHelper()
 {
     Parameter = new MeCabParam();
     Tagger    = MeCabTagger.Create(Parameter);
 }
Exemple #14
0
        static void Main(string[] args)
        {
            if (File.Exists("index.txt"))
            {
                Console.WriteLine("Detect index.txt");
            }
            else
            {
                Console.WriteLine("Make inverted index.");
                Console.WriteLine("Calculating Term Frequency ...");
                var weightList    = new Dictionary <string, Dictionary <string, double> >(); // Dictionary<word, Dictionary<filename, weight>>
                var invertedIndex = new Dictionary <string, List <string> >();               // Dictionary<word, List<filename orderby weight>>
                var targetFiles   = Directory.GetFiles(dataDir, @"*.txt");

                MeCabParam param = new MeCabParam();
                param.DicDir = dicDir;
                MeCabTagger t = MeCabTagger.Create(param);

                Stopwatch sw = new Stopwatch();
                sw.Start();
                {
                    Parallel.ForEach(targetFiles, fileName =>
                    {
                        Console.WriteLine("Processing " + fileName);

                        var wordList   = new Dictionary <string, int>(); // 単語数カウント用リスト
                        int wordCount  = 0;
                        var lockObject = new Object();

                        Parallel.ForEach(File.ReadLines(fileName), line =>
                        {
                            var node = t.ParseToNode(line);
                            while (node != null)
                            {
                                if (node.CharType > 0)
                                {
                                    lock (lockObject)
                                    {
                                        ++wordCount;
                                    }

                                    var normalized   = node.Feature.Split(',')[6];
                                    var originalForm = (normalized == null || normalized == "" || normalized == "*") ? node.Surface : normalized;
                                    // 原形がないものは表装文字を代表とし、原形がある場合はそちらを代表とする

                                    lock (wordList)
                                    {
                                        if (!wordList.ContainsKey(originalForm))
                                        {
                                            wordList[originalForm] = 0;
                                        }
                                        ++wordList[originalForm];
                                    }
                                }
                                node = node.Next;
                            }
                        });

                        Parallel.ForEach(wordList.Keys, word =>
                        {
                            lock (weightList)
                            {
                                if (!weightList.ContainsKey(word))
                                {
                                    weightList[word] = new Dictionary <string, double>();
                                }
                                weightList[word][fileName] = wordList[word] / (double)wordCount;
                            }
                        });
                    });
                }
                sw.Stop();
                Console.WriteLine($"{sw.ElapsedMilliseconds} msec Elpsed.");

                Console.WriteLine("Constructing Inverted Index ...");
                sw.Restart();
                {
                    /*
                     * invertedIndex = weightList.Keys
                     *  .AsParallel()
                     *  .ToDictionary(
                     *      word => word,
                     *      word => weightList[word].Keys
                     *          .OrderByDescending(fileName => weightList[word][fileName])
                     *          .ThenBy(fileName => fileName)
                     *          .ToList());
                     */
                    Parallel.ForEach(weightList.Keys, word =>
                    {
                        var ks = weightList[word].Keys.OrderByDescending(fileName => weightList[word][fileName]).ThenBy(fileName => fileName).ToList();
                        lock (invertedIndex)
                        {
                            invertedIndex[word] = ks;
                        }

                        if (!invertedIndex.ContainsKey(word))
                        {
                            Console.WriteLine($"{word}は転置インデックスに含まれていません");
                        }
                    });
                }
                sw.Stop();
                Console.WriteLine($"{sw.ElapsedMilliseconds} msec Elpsed.");

                Console.WriteLine("Calculating Inverse Document Frequency and Recording Weight to weightList ...");
                sw.Restart();
                {
                    weightList = weightList.AsParallel()
                                 .ToDictionary(
                        kv1 => kv1.Key,
                        kv1 =>
                    {
                        var idf = Math.Log(targetFiles.Length / kv1.Value.Count, 2) + 1;
                        return(kv1.Value.ToDictionary(kv2 => kv2.Key, kv2 => kv2.Value * idf));
                    });
                }
                sw.Stop();
                Console.WriteLine($"{sw.ElapsedMilliseconds} msec Elpsed.");

                StreamWriter writer = new StreamWriter(@"index.txt", false, Encoding.GetEncoding("utf-8"));
                foreach (var word in invertedIndex.Keys)
                {
                    writer.Write($"{word}\t");
                    foreach (var filename in invertedIndex[word])
                    {
                        writer.Write($"({filename}, {weightList[word][filename]}), ");
                    }
                    writer.WriteLine();
                }
                writer.Close();

                Console.WriteLine("Successfully finishing all procedures.");
            }

            Console.Read();
        }
Exemple #15
0
        static void Main(string[] args)
        {
            Properties.Settings settings = Properties.Settings.Default;
            string    targetFile         = settings.TargetFile;
            Encoding  encoding           = Encoding.GetEncoding(settings.TargetEncoding);
            Stopwatch sw = new Stopwatch();

            //開始指示を待機
            Console.WriteLine("Press Enter key to start.");
            Console.ReadLine();

            Console.WriteLine("\t\t\tProcessTime\tTotalMemory");

            MeCabParam param = new MeCabParam();

            param.DicDir = @"D:\DidacticalEnigma-Data\mecab\ipadic";
#if MMF
            param.UseMemoryMappedFile = true;
#endif

            //解析準備処理
            GC.Collect();
            sw.Start();
            MeCabTagger tagger = MeCabTagger.Create(param);
            sw.Stop();
            Console.WriteLine("OpenTagger:\t\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));

            //ファイル読込だけの場合
            using (StreamReader reader = new StreamReader(targetFile, encoding))
            {
                sw.Reset();
                GC.Collect();
                sw.Start();
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                }
                sw.Stop();
            }
            Console.WriteLine("ReadLine:\t\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));

            //解析処理(Nodeの出力)
            using (StreamReader reader = new StreamReader(targetFile, encoding))
            {
                sw.Reset();
                GC.Collect();
                sw.Start();
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                    MeCabNode node = tagger.ParseToNode(line);
                }
                sw.Stop();
            }
            Console.WriteLine("ParseToNode:\t\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));

            //解析処理(latticeモードの文字列出力)
            tagger.OutPutFormatType = "lattice";
            using (StreamReader reader = new StreamReader(targetFile, encoding))
            {
                sw.Reset();
                GC.Collect();
                sw.Start();
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                    string ret = tagger.Parse(line);
                }
                sw.Stop();
            }
            Console.WriteLine("Parse(lattice):\t\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));


            //解析処理(Best解5件のNodeの出力)
            tagger.LatticeLevel = MeCabLatticeLevel.One;
            using (StreamReader reader = new StreamReader(targetFile, encoding))
            {
                sw.Reset();
                GC.Collect();
                sw.Start();
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                    int i = 0;
                    foreach (MeCabNode node in tagger.ParseNBestToNode(line))
                    {
                        if (++i == 5)
                        {
                            break;
                        }
                    }
                }
                sw.Stop();
            }
            Console.WriteLine("ParseNBestToNode:\t{0:0.000}sec\t{1:#,000}byte",
                              sw.Elapsed.TotalSeconds, GC.GetTotalMemory(false));

            //対象の情報
            using (StreamReader reader = new StreamReader(targetFile, encoding))
            {
                long charCount = 0;
                long lineCount = 0;
                long wordCount = 0;
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                    charCount += line.Length;
                    lineCount++;
                    MeCabNode node = tagger.ParseToNode(line);
                    for (node = node.Next; node.Next != null; node = node.Next)
                    {
                        wordCount++;
                    }
                }
                Console.WriteLine();
                Console.WriteLine("Target: {0} {1:#,000}byte {2:#,000}char {3:#,000}line ({4:#,000}word)",
                                  targetFile, reader.BaseStream.Position, charCount, lineCount, wordCount);
            }

            tagger.Dispose();

            //終了したことを通知
            Console.WriteLine();
            Console.WriteLine("Finish!");
            Console.WriteLine("Press Enter key to close.");
            Console.ReadLine();
        }
Exemple #16
0
        public unsafe TNode Lookup(char *begin,
                                   char *end,
                                   byte *bytesBegin,
                                   byte *bytesEnd,
                                   MeCabParam param,
                                   Func <TNode> nodeAllocator)
        {
            CharInfo cInfo;

            if (end - begin > ushort.MaxValue)
            {
                end = begin + ushort.MaxValue;
            }

            int   leftSpaceLen;
            char *begin2 = property.SeekToOtherType(begin, end, this.space, &cInfo, &leftSpaceLen);

            if (begin2 >= end)
            {
                return(null);
            }
            byte *bytesBegin2 = bytesBegin + this.Encoding.GetByteCount(begin, leftSpaceLen);

            TNode resultNode = null;
            var   daResults  = stackalloc DoubleArray.ResultPair[DAResultSize];

            foreach (MeCabDictionary it in this.dic)
            {
                int n = it.CommonPrefixSearch(bytesBegin2, (int)(bytesEnd - bytesBegin2), daResults, DAResultSize);
                for (int i = 0; i < n; i++)
                {
                    int length    = this.Encoding.GetCharCount(bytesBegin2, daResults->Length);
                    int rLength   = (int)(begin2 - begin) + length;
                    var tokenSize = it.GetTokenSize(daResults->Value);
                    var tokens    = it.GetTokens(daResults->Value);
                    for (int j = 0; j < tokenSize; j++)
                    {
                        var newNode = nodeAllocator();
                        newNode.Surface  = new string(begin2, 0, length);
                        newNode.Length   = length;
                        newNode.RLength  = rLength;
                        newNode.LCAttr   = tokens->LcAttr;
                        newNode.RCAttr   = tokens->RcAttr;
                        newNode.PosId    = tokens->PosId;
                        newNode.WCost    = tokens->WCost;
                        newNode.PFeature = it.GetFeature(tokens->Feature);
                        tokens++;
                        newNode.Encoding = this.Encoding;
                        newNode.Stat     = MeCabNodeStat.Nor;
                        newNode.CharType = cInfo.DefaultType;
                        newNode.BNext    = resultNode;
                        resultNode       = newNode;
                    }

                    daResults++;
                }
            }

            if (resultNode != null && !cInfo.Invoke)
            {
                return(resultNode);
            }

            char *begin3      = begin2 + 1;
            char *groupBegin3 = null;

            if (cInfo.Group)
            {
                char *   tmp = begin3;
                CharInfo fail;
                int      cLen;
                begin3 = this.property.SeekToOtherType(begin3, end, cInfo, &fail, &cLen);
                if (cLen <= param.MaxGroupingSize)
                {
                    this.AddUnknown(ref resultNode, cInfo, begin, begin2, begin3, nodeAllocator);
                }
                groupBegin3 = begin3;
                begin3      = tmp;
            }

            for (int i = 1; i <= cInfo.Length; i++)
            {
                if (begin3 > end)
                {
                    break;
                }
                if (begin3 == groupBegin3)
                {
                    continue;
                }
                this.AddUnknown(ref resultNode, cInfo, begin, begin2, begin3, nodeAllocator);
                if (!cInfo.IsKindOf(this.property.GetCharInfo(*begin3)))
                {
                    break;
                }
                begin3 += 1;
            }

            if (resultNode == null)
            {
                this.AddUnknown(ref resultNode, cInfo, begin, begin2, begin3, nodeAllocator);
            }

            return(resultNode);
        }
 static JapanesePOSExtractor()
 {
     meCabParam        = new MeCabParam();
     meCabParam.DicDir = ConfigurationManager.AppSettings["mecabDicPath"];
     tagger            = MeCabTagger.Create(meCabParam);
 }
Exemple #18
0
 public Markov(string dictionaryPath)
 {
     mecabParam        = new MeCabParam();
     mecabParam.DicDir = @"Assets/dic/ipadic";
     mecabTagger       = MeCabTagger.Create(mecabParam);
 }
        public void Open(MeCabParam param)
        {
            this.dic = new MeCabDictionary[param.UserDic.Length + 1];

            string prefix = param.DicDir;

            this.property.Open(prefix);

            this.unkDic.Open(Path.Combine(prefix, UnkDicFile));
            if (this.unkDic.Type != DictionaryType.Unk)
                throw new MeCabInvalidFileException("not a unk dictionary", this.unkDic.FileName);

            MeCabDictionary sysDic = new MeCabDictionary();
            sysDic.Open(Path.Combine(prefix, SysDicFile));
            if (sysDic.Type != DictionaryType.Sys)
                throw new MeCabInvalidFileException("not a system dictionary", sysDic.FileName);
            this.dic[0] = sysDic;

            for (int i = 0; i < param.UserDic.Length; i++)
            {
                MeCabDictionary d = new MeCabDictionary();
                d.Open(Path.Combine(prefix, param.UserDic[i]));
                if (d.Type != DictionaryType.Usr)
                    throw new MeCabInvalidFileException("not a user dictionary", d.FileName);
                if (!sysDic.IsCompatible(d))
                    throw new MeCabInvalidFileException("incompatible dictionary", d.FileName);
                this.dic[i + 1] = d;
            }

            this.unkTokens = new Token[this.property.Size][];
            for (int i = 0; i < this.unkTokens.Length; i++)
            {
                string key = this.property.Name(i);
                DoubleArray.ResultPair n = this.unkDic.ExactMatchSearch(key);
                if (n.Value == -1)
                    throw new MeCabInvalidFileException("cannot find UNK category: " + key, this.unkDic.FileName);
                this.unkTokens[i] = this.unkDic.GetToken(n);
            }

            this.space = this.property.GetCharInfo(' ');

            this.bosFeature = param.BosFeature;
            this.unkFeature = param.UnkFeature;

            this.maxGroupingSize = param.MaxGroupingSize;
            if (this.maxGroupingSize <= 0) this.maxGroupingSize = DefaltMaxGroupingSize;
        }
Exemple #20
0
        public void Open(MeCabParam param)
        {
            this.dic = new MeCabDictionary[param.UserDic.Length + 1];

            string prefix = param.DicDir;

            this.property.Open(prefix);

            this.unkDic.Open(Path.Combine(prefix, UnkDicFile));
            if (this.unkDic.Type != DictionaryType.Unk)
            {
                throw new MeCabInvalidFileException("not a unk dictionary", this.unkDic.FileName);
            }

            MeCabDictionary sysDic = new MeCabDictionary();

            sysDic.Open(Path.Combine(prefix, SysDicFile));
            if (sysDic.Type != DictionaryType.Sys)
            {
                throw new MeCabInvalidFileException("not a system dictionary", sysDic.FileName);
            }
            this.dic[0] = sysDic;

            for (int i = 0; i < param.UserDic.Length; i++)
            {
                MeCabDictionary d = new MeCabDictionary();
                d.Open(Path.Combine(prefix, param.UserDic[i]));
                if (d.Type != DictionaryType.Usr)
                {
                    throw new MeCabInvalidFileException("not a user dictionary", d.FileName);
                }
                if (!sysDic.IsCompatible(d))
                {
                    throw new MeCabInvalidFileException("incompatible dictionary", d.FileName);
                }
                this.dic[i + 1] = d;
            }

            this.unkTokens = new Token[this.property.Size][];
            for (int i = 0; i < this.unkTokens.Length; i++)
            {
                string key = this.property.Name(i);
                DoubleArray.ResultPair n = this.unkDic.ExactMatchSearch(key);
                if (n.Value == -1)
                {
                    throw new MeCabInvalidFileException("cannot find UNK category: " + key, this.unkDic.FileName);
                }
                this.unkTokens[i] = this.unkDic.GetToken(n);
            }

            this.space = this.property.GetCharInfo(' ');

            this.bosFeature = param.BosFeature;
            this.unkFeature = param.UnkFeature;

            this.maxGroupingSize = param.MaxGroupingSize;
            if (this.maxGroupingSize <= 0)
            {
                this.maxGroupingSize = DefaltMaxGroupingSize;
            }
        }
Exemple #21
0
 public void Open(MeCabParam param)
 {
     this.OutputFormatType = param.OutputFormatType;
 }
Exemple #22
0
 public void Open(MeCabParam param)
 {
     this.OutputFormatType = param.OutputFormatType;
 }
Exemple #23
0
 public MeCabUnidic(MeCabParam mecabParam) :
     base(mecabParam)
 {
 }
Exemple #24
0
        /// <summary>
        /// MeCabインスタンスの作成
        /// </summary>
        /// <returns>MeCabインスタンス</returns>
        private static MeCabTagger Create()
        {
            MeCabParam param = new MeCabParam();

            return(MeCabTagger.Create(param));
        }
Exemple #25
0
        public void Open(MeCabParam param)
        {
            tokenizer.Open(param);
            connector.Open(param);

            this.costFactor = param.CostFactor;
            this.Theta = param.Theta;
            this.LatticeLevel = param.LatticeLevel;
            this.Partial = param.Partial;
            this.AllMorphs = param.AllMorphs;
        }
Exemple #26
0
    static void UseLattice()
    {
        Console.WriteLine("----------------------------------------------------------------------");
        Console.WriteLine("Example of using Lattice :");
        Console.WriteLine();

        using (var tagger = NMeCabIpaDic.CreateTagger())
        {
            var prm = new MeCabParam()
            {
                LatticeLevel = MeCabLatticeLevel.Two,
                Theta        = 1f / 800f / 2f
            };

            var lattice = tagger.ParseToLattice("東京大学", prm); // ラティスを取得

            // ラティスから、ベスト解を取得し処理
            foreach (var node in lattice.GetBestNodes())
            {
                Console.Write(node.Surface);
                Console.CursorLeft = 10;
                Console.Write(node.Feature);
                Console.WriteLine();
            }

            Console.WriteLine("--------");

            // ラティスから、2番目と3番目のベスト解を取得し処理
            foreach (var result in lattice.GetNBestResults().Skip(1).Take(2))
            {
                foreach (var node in result)
                {
                    Console.Write(node.Surface);
                    Console.CursorLeft = 10;
                    Console.Write(node.Feature);
                    Console.WriteLine();
                }

                Console.WriteLine("----");
            }

            Console.WriteLine("--------");

            // ラティスから、開始位置別の形態素を取得し処理
            for (int i = 0; i < lattice.BeginNodeList.Length - 1; i++)
            {
                for (var node = lattice.BeginNodeList[i]; node != null; node = node.BNext)
                {
                    if (node.Prob <= 0.001f)
                    {
                        continue;
                    }

                    Console.CursorLeft = i * 2;
                    Console.Write(node.Surface);
                    Console.CursorLeft = 10;
                    Console.Write(node.Prob.ToString("F3"));
                    Console.CursorLeft = 16;
                    Console.Write(node.Feature);
                    Console.WriteLine();
                }
            }

            Console.WriteLine("--------");

            // ラティスから、最終的な累積コストのみを取得し表示
            Console.WriteLine(lattice.EosNode.Cost);
        }
    }
Exemple #27
0
 public MeCabIpadic(MeCabParam mecabParam) :
     base(mecabParam)
 {
 }
        public void Open(MeCabParam param)
        {
            string fileName = Path.Combine(param.DicDir, MatrixFile);

            this.Open(fileName);
        }
Exemple #29
0
 public MecabHelper()
 {
     parameter = new MeCabParam();
 }