public int GetSize(DoubleArray.Word n)
 {
     return(n.value & 0xFF);
 }
Esempio n. 2
0
        //---------------------------------------------------------------------------

        public bool Open(string tDirectory)
        {
            Close();

            // UNKDictionary Open
            m_UNKDictionary = new WordDictionary();
            if (m_UNKDictionary.Open(Path.Combine(tDirectory, UNK_DIC_FILE).Replace("\\", "/")) == false)
            {
                return(false);
            }

            //----------------------------------
            // SystemDictionary Open

            m_Dictionaries = new List <WordDictionary>();

            WordDictionary tSystemDictionary = new WordDictionary();

            if (tSystemDictionary.Open(Path.Combine(tDirectory, SYS_DIC_FILE).Replace("\\", "/")) == false)
            {
                return(false);
            }

            if (tSystemDictionary.Type != 0)
            {
                return(false);
            }

            // 文字コード文字列から文字コード識別値を取得する
            m_CharsetCode = GetCharsetCode(tSystemDictionary.Charset);

            // 辞書リストに追加する
            m_Dictionaries.Add(tSystemDictionary);

            //----------------------------------

            // CharProperty Open
            m_CharProperty = new CharProperty(m_CharsetCode);
            if (m_CharProperty.Open(tDirectory) == false)
            {
                return(false);
            }

            //----------------------------------

            int tLast = m_Dictionaries.Count - 1;

            m_LSize = ( uint )m_Dictionaries[tLast].LSize;
            m_RSize = ( uint )m_Dictionaries[tLast].RSize;

            //----------------------------------------------------------
            // UNKToken Open

            m_UNKTokens = new List <KeyValuePair <Token, int> >();

            for (int i = 0; i < m_CharProperty.Size; ++i)
            {
                byte[] tKey = m_CharProperty.GetName(i);

                DoubleArray.Word n = m_UNKDictionary.ExactMatchSearch(tKey);

                if (n.value == -1)
                {
                    Debug.LogWarning("cannot find UNK category: " + tKey);
                    return(false);
                }

                Token tToken = m_UNKDictionary.GetToken(n);
                int   tSize  = m_UNKDictionary.GetSize(n);

                m_UNKTokens.Add(new KeyValuePair <Token, int>(tToken, tSize));
            }

            //----------------------------------------------------------

            m_Space = m_CharProperty.GetCharInfo(0x20);                 // ad-hoc

            m_BOSFeature = StringToBytes(m_CharsetCode, BOS_FEATURE);
//			m_MaxGroupingSize = DEFAULT_MAX_GROUPING_SIZE ;

            return(true);
        }
        //---------------------------------------------------------------------------

        public Token GetToken(DoubleArray.Word n, int o = 0)
        {
            return(m_Token[(n.value >> 8) + o]);
        }
Esempio n. 4
0
        // オフセット操作
        public Node Lookup(byte[] tSentence, int tBegin, int tEnd)
        {
            CharInfo cinfo       = new CharInfo();
            Node     result_node = null;
            int      mblen       = 0;
            int      clen        = 0;

            // 最大 65536 バイトに制限する
            tEnd = (tEnd - tBegin) >= 65535 ? tBegin + 65535 : tEnd;

            // オフセット操作
            int begin2 = m_CharProperty.SeekToOtherType(tSentence, tBegin, tEnd, m_Space, ref cinfo, ref mblen, ref clen);

            DoubleArray.Word[] daresults = new DoubleArray.Word[kResultsSize];
            for (int i = 0; i < daresults.Length; i++)
            {
                daresults[i] = new DoubleArray.Word();
            }

            int results_size = kResultsSize;

            for (int it_p = 0; it_p < m_Dictionaries.Count; ++it_p)
            {
                WordDictionary it = m_Dictionaries[it_p];

                int n = it.CommonPrefixSearch(tSentence, begin2, tEnd - begin2, daresults, results_size);

                for (int i = 0; i < n; ++i)
                {
                    Token tToken = it.GetToken(daresults[i]);
                    int   tSize  = it.GetSize(daresults[i]);

                    for (int j = 0; j < tSize; ++j)
                    {
                        Node tNewNode = new Node();

                        ReadNodeInfo(it, it.GetRelativeToken(tToken, j), ref tNewNode);

                        tNewNode.length  = ( ushort )daresults[i].length;
                        tNewNode.rlength = ( ushort )(begin2 - tBegin + tNewNode.length);

                        // オフセット操作
                        tNewNode.surface_s = tSentence;
                        tNewNode.surface_o = begin2;


                        tNewNode.stat      = MECAB_NOR_NODE;
                        tNewNode.char_type = ( byte )cinfo.DefaultType;

                        tNewNode.bnext = result_node;
                        result_node    = tNewNode;
                    }
                }
            }

            if (result_node != null && cinfo.Invoke == 0)
            {
                return(result_node);
            }

            // オフセット操作
            int begin3 = begin2 + mblen;

            // オフセット操作
            int group_begin3 = 0;

            if (begin3 > tEnd)
            {
                //--------------------------------------------------------
                // ADDUNKNWON ;
                ADDUNKNWON(cinfo, tSentence, tBegin, begin2, begin3, ref result_node);

                //--------------------------------------------------------

                if (result_node != null)
                {
                    return(result_node);
                }
            }

            if (cinfo.Group != 0)
            {
                int tmp = begin3;

                CharInfo fail = new CharInfo();
                begin3 = m_CharProperty.SeekToOtherType(tSentence, begin3, tEnd, cinfo, ref fail, ref mblen, ref clen);
                if (clen <= DEFAULT_MAX_GROUPING_SIZE)
                {
//						ADDUNKNWON ;
                    ADDUNKNWON(cinfo, tSentence, tBegin, begin2, begin3, ref result_node);
                }
                group_begin3 = begin3;
                begin3       = tmp;
            }

            for (int i = 1; i <= cinfo.Length; ++i)
            {
                if (begin3 > tEnd)
                {
                    break;
                }

                if (begin3 == group_begin3)
                {
                    continue;
                }

                clen = i;

//					ADDUNKNWON ;
                ADDUNKNWON(cinfo, tSentence, tBegin, begin2, begin3, ref result_node);

                if (cinfo.isKindOf(m_CharProperty.GetCharInfo(tSentence, begin3, tEnd, ref mblen)) == false)
                {
                    break;
                }

                begin3 += mblen;
            }

            if (result_node == null)
            {
//					ADDUNKNWON ;
                ADDUNKNWON(cinfo, tSentence, tBegin, begin2, begin3, ref result_node);
            }

            return(result_node);
        }