Ejemplo n.º 1
0
 public WordAttributeStruct(WordAttribute wa)
 {
     this.Word      = wa.Word;
     this.Pos       = wa.Pos;
     this.Frequency = wa.Frequency;
 }
Ejemplo n.º 2
0
 public WordInfo(WordAttribute wordAttr)
 {
     this.Word      = wordAttr.Word;
     this.Pos       = wordAttr.Pos;
     this.Frequency = wordAttr.Frequency;
 }
Ejemplo n.º 3
0
        /// <summary>
        /// 合并英文专用词。
        /// 如果字典中有英文专用词如U.S.A, C++.C#等
        /// 需要对初步分词后的英文和字母进行合并
        /// </summary>
        /// <param name="words"></param>
        /// <param name="start"></param>
        /// <param name="end"></param>
        /// <returns></returns>
        //private String MergeEnglishSpecialWord(CExtractWords extractWords, ArrayList words, int start, ref int end)
        //{
        //    StringBuilder str = new StringBuilder();

        //    int i;

        //    for (i = start; i < words.Count; i++)
        //    {
        //        string word = (string)words[i];

        //        //word 为空或者为空格回车换行等分割符号,中断扫描
        //        if (word.Trim() == "")
        //        {
        //            break;
        //        }

        //        //如果遇到中文,中断扫描
        //        if (word[0] >= 0x4e00 && word[0] <= 0x9fa5)
        //        {
        //            break;
        //        }

        //        str.Append(word);
        //    }

        //    String mergeString = str.ToString();
        //    List<T_WordInfo> exWords = extractWords.ExtractFullText(mergeString);

        //    if (exWords.Count == 1)
        //    {
        //        T_WordInfo info = (T_WordInfo)exWords[0];
        //        if (info.Word.Length == mergeString.Length)
        //        {
        //            end = i;
        //            return mergeString;
        //        }
        //    }

        //    return null;

        //}

        private bool MergeEnglishSpecialWord(string orginalText, SuperLinkedList <WordInfo> wordInfoList, ref SuperLinkedListNode <WordInfo> current)
        {
            SuperLinkedListNode <WordInfo> cur = current;

            cur = cur.Next;

            int last = -1;

            while (cur != null)
            {
                if (cur.Value.WordType == WordType.Symbol || cur.Value.WordType == WordType.English)
                {
                    last = cur.Value.Position + cur.Value.Word.Length;
                    cur  = cur.Next;
                }
                else
                {
                    break;
                }
            }


            if (last >= 0)
            {
                int first = current.Value.Position;

                string newWord = orginalText.Substring(first, last - first);

                WordAttribute wa = _WordDictionary.GetWordAttr(newWord);

                if (wa == null)
                {
                    return(false);
                }

                while (current != cur)
                {
                    SuperLinkedListNode <WordInfo> removeItem = current;
                    current = current.Next;
                    wordInfoList.Remove(removeItem);
                }

                WordInfo newWordInfo = new WordInfo(new Dict.PositionLength(first, last - first,
                                                                            wa), orginalText, _Parameters);

                newWordInfo.WordType = WordType.English;
                newWordInfo.Rank     = _Parameters.EnglishRank;

                if (_Options.EnglishSegment)
                {
                    string lowerWord = newWordInfo.Word.ToLower();

                    if (lowerWord != newWordInfo.Word)
                    {
                        if (current == null)
                        {
                            wordInfoList.AddLast(newWordInfo);
                        }
                        else
                        {
                            wordInfoList.AddBefore(current, newWordInfo);
                        }
                    }

                    newWordInfo = new WordInfo(lowerWord, newWordInfo.Position, newWordInfo.Pos, newWordInfo.Frequency, _Parameters.EnglishLowerRank, newWordInfo.WordType,
                                               newWordInfo.OriginalWordType);
                }
                else if (_Options.IgnoreCapital)
                {
                    newWordInfo.Word = newWordInfo.Word.ToLower();
                }

                if (current == null)
                {
                    wordInfoList.AddLast(newWordInfo);
                }
                else
                {
                    wordInfoList.AddBefore(current, newWordInfo);
                }

                return(true);
            }


            return(false);
        }