Exemple #1
0
        private void FilterStopWord(SuperLinkedList <WordInfo> wordInfoList)
        {
            if (wordInfoList == null)
            {
                return;
            }

            SuperLinkedListNode <WordInfo> cur = wordInfoList.First;

            while (cur != null)
            {
                if (_StopWord.IsStopWord(cur.Value.Word,
                                         _Options.FilterEnglish, _Parameters.FilterEnglishLength,
                                         _Options.FilterNumeric, _Parameters.FilterNumericLength))
                {
                    SuperLinkedListNode <WordInfo> removeItem = cur;
                    cur = cur.Next;
                    wordInfoList.Remove(removeItem);
                }
                else
                {
                    cur = cur.Next;
                }
            }
        }
Exemple #2
0
        /// <summary>
        /// 提取版本号
        /// </summary>
        /// <param name="result">盘古分词的结果</param>
        /// <param name="vWordNode">V 这个字符的第一个出现位置</param>
        /// <param name="lastNode">版本号的最后一个词</param>
        /// <param name="versionBeginPosition">版本号第一个词的起始位置</param>
        private void Pickup(SuperLinkedList<WordInfo> result, SuperLinkedListNode<WordInfo> vWordNode,
            SuperLinkedListNode<WordInfo> lastNode, int versionBeginPosition)
        {
            SuperLinkedListNode<WordInfo> node = vWordNode.Next;
            int lastPosition = lastNode.Value.Position + lastNode.Value.Word.Length;

            SuperLinkedListNode<WordInfo> end = lastNode.Next;

            while (node != end)
            {
                result.Remove(node);
                node = vWordNode.Next;
            }

            if (vWordNode.Value.Word == "V")
            {
                vWordNode.Value.Word = "v";
            }

            string version = _Text.Substring(versionBeginPosition, lastPosition - versionBeginPosition);

            int dotPosition = 0;
            int dotCount = 0;

            WordInfo verWord = null;
            dotPosition = version.IndexOf('.', dotPosition);

            while (dotPosition > 0)
            {
                verWord = null;

                if (dotCount > 0) //第一个点之前的版本号不提取
                {
                    //提取前n个子版本号
                    verWord = new WordInfo(version.Substring(0, dotPosition), POS.POS_D_K, 0);
                    verWord.Rank = 1; //这里设置子版本号的权重
                    verWord.Position = versionBeginPosition;
                    verWord.WordType = WordType.None;
                }

                dotCount++;

                dotPosition = version.IndexOf('.', dotPosition + 1);

                if (verWord != null)
                {
                    result.AddAfter(vWordNode, verWord);
                }
            }

            //提取完整版本号
            verWord = new WordInfo(version, POS.POS_D_K, 0);
            verWord.Rank = 5; //这里设置完整版本号的权重
            verWord.Position = versionBeginPosition;
            verWord.WordType = WordType.None;
            result.AddAfter(vWordNode, verWord);

        }
Exemple #3
0
        /// <summary>
        /// 提取版本号
        /// </summary>
        /// <param name="result">盘古分词的结果</param>
        /// <param name="vWordNode">V 这个字符的第一个出现位置</param>
        /// <param name="lastNode">版本号的最后一个词</param>
        /// <param name="versionBeginPosition">版本号第一个词的起始位置</param>
        private void Pickup(SuperLinkedList <WordInfo> result, SuperLinkedListNode <WordInfo> vWordNode,
                            SuperLinkedListNode <WordInfo> lastNode, int versionBeginPosition)
        {
            SuperLinkedListNode <WordInfo> node = vWordNode.Next;
            int lastPosition = lastNode.Value.Position + lastNode.Value.Word.Length;

            SuperLinkedListNode <WordInfo> end = lastNode.Next;

            while (node != end)
            {
                result.Remove(node);
                node = vWordNode.Next;
            }

            if (vWordNode.Value.Word == "V")
            {
                vWordNode.Value.Word = "v";
            }

            string version = _Text.Substring(versionBeginPosition, lastPosition - versionBeginPosition);

            int dotPosition = 0;
            int dotCount    = 0;

            WordInfo verWord = null;

            dotPosition = version.IndexOf('.', dotPosition);

            while (dotPosition > 0)
            {
                verWord = null;

                if (dotCount > 0) //第一个点之前的版本号不提取
                {
                    //提取前n个子版本号
                    verWord          = new WordInfo(version.Substring(0, dotPosition), POS.POS_D_K, 0);
                    verWord.Rank     = 1; //这里设置子版本号的权重
                    verWord.Position = versionBeginPosition;
                    verWord.WordType = WordType.None;
                }

                dotCount++;

                dotPosition = version.IndexOf('.', dotPosition + 1);

                if (verWord != null)
                {
                    result.AddAfter(vWordNode, verWord);
                }
            }

            //提取完整版本号
            verWord          = new WordInfo(version, POS.POS_D_K, 0);
            verWord.Rank     = 5; //这里设置完整版本号的权重
            verWord.Position = versionBeginPosition;
            verWord.WordType = WordType.None;
            result.AddAfter(vWordNode, verWord);
        }
Exemple #4
0
        private SuperLinkedList<WordInfo> PreSegment(String text)
        {
            SuperLinkedList<WordInfo> result = GetInitSegment(text);

            SuperLinkedListNode<WordInfo> cur = result.First;

            this.ActionSegment(result, cur, text);
            return result;
        }
Exemple #5
0
        public void AfterSegment(SuperLinkedList <WordInfo> result)
        {
            SuperLinkedListNode <WordInfo> node = result.First;

            while (node != null)
            {
                if (node.Value.WordType == WordType.English)
                {
                    int position = node.Value.Word.IndexOf("Nokia", 0, StringComparison.CurrentCultureIgnoreCase);
                    if (position >= 0 &&
                        !node.Value.Word.Equals("Nokia", StringComparison.CurrentCultureIgnoreCase))
                    {
                        WordInfo wordinfo = new WordInfo("Nokia", node.Value.Position + position, node.Value.Pos,
                                                         node.Value.Frequency, node.Value.Rank, node.Value.WordType, node.Value.OriginalWordType);
                        node = result.AddAfter(node, wordinfo);
                    }
                }

                node = node.Next;
            }
        }
Exemple #6
0
        public SuperLinkedList <WordInfo> Match(PanGu.Dict.PositionLength[] positionLenArr, string orginalText, int count)
        {
            if (_Options == null)
            {
                _Options = Setting.PanGuSettings.Config.MatchOptions;
            }

            if (_Parameters == null)
            {
                _Parameters = Setting.PanGuSettings.Config.Parameters;
            }

            int[] masks      = new int[orginalText.Length];
            int   redundancy = _Parameters.Redundancy;

            SuperLinkedList <WordInfo> result = new SuperLinkedList <WordInfo>();

            if (count == 0)
            {
                if (_Options.UnknownWordIdentify)
                {
                    WordInfo wi = new WordInfo();
                    wi.Word     = orginalText;
                    wi.Position = 0;
                    wi.WordType = WordType.None;
                    wi.Rank     = 1;
                    result.AddFirst(wi);
                    return(result);
                }
                else
                {
                    int position = 0;
                    foreach (char c in orginalText)
                    {
                        WordInfo wi = new WordInfo();
                        wi.Word     = c.ToString();
                        wi.Position = position++;
                        wi.WordType = WordType.None;
                        wi.Rank     = 1;
                        result.AddLast(wi);
                    }

                    return(result);
                }
            }

            Node[] leafNodeArray = GetLeafNodeArray(positionLenArr, orginalText.Length, count);

            //下面两句是不采用孤立点分割算法的老算法
            //Node[] leafNodeArray = GetLeafNodeArrayCore(positionLenArr, orginalText.Length, count);
            //Framework.QuickSort<Node>.TopSort(leafNodeArray,
            //    _LeafNodeList.Count, (int)Math.Min(TopRecord, _LeafNodeList.Count), new NodeComparer());

            int j = 0;

            // 获取前TopRecord个单词序列
            foreach (Node node in leafNodeArray)
            {
                if (leafNodeArray[j] == null)
                {
                    break;
                }

                if (j >= TopRecord || j >= leafNodeArray.Length)
                {
                    break;
                }

                Dict.PositionLength[] comb = new PanGu.Dict.PositionLength[node.AboveCount];

                int  i   = node.AboveCount - 1;
                Node cur = node;

                while (i >= 0)
                {
                    comb[i] = cur.PositionLength;
                    cur     = cur.Parent;
                    i--;
                }

                _AllCombinations.Add(comb);

                j++;
            }

            //Force single word
            //强制一元分词
            if (_Options.ForceSingleWord)
            {
                Dict.PositionLength[] comb = new PanGu.Dict.PositionLength[orginalText.Length];

                for (int i = 0; i < comb.Length; i++)
                {
                    PanGu.Dict.PositionLength pl = new PanGu.Dict.PositionLength(i, 1, new WordAttribute(orginalText[i].ToString(), POS.POS_UNK, 0));
                    pl.Level = 3;
                    comb[i]  = pl;
                }

                _AllCombinations.Add(comb);
            }

            if (_AllCombinations.Count > 0)
            {
                ICollection <Dict.PositionLength> positionCollection = MergeAllCombinations(redundancy);

                foreach (Dict.PositionLength pl in positionCollection)
                //for (int i = 0; i < _AllCombinations[0].Length; i++)
                {
                    //result.AddLast(new WordInfo(_AllCombinations[0][i], orginalText));
                    result.AddLast(new WordInfo(pl, orginalText, _Parameters));
                    if (pl.Length > 1)
                    {
                        for (int k = pl.Position;
                             k < pl.Position + pl.Length; k++)
                        {
                            masks[k] = 2;
                        }
                    }
                    else
                    {
                        masks[pl.Position] = 1;
                    }
                }
            }

            #region 合并未登录词

            bool            needRemoveSingleWord;
            List <WordInfo> unknownWords = GetUnknowWords(masks, orginalText, out needRemoveSingleWord);

            //合并到结果序列的对应位置中
            if (unknownWords.Count > 0)
            {
                SuperLinkedListNode <WordInfo> cur = result.First;

                if (needRemoveSingleWord && !_Options.ForceSingleWord)
                {
                    //Remove single word need be remvoed

                    while (cur != null)
                    {
                        if (cur.Value.Word.Length == 1)
                        {
                            if (masks[cur.Value.Position] == 11)
                            {
                                SuperLinkedListNode <WordInfo> removeItem = cur;

                                cur = cur.Next;

                                result.Remove(removeItem);

                                continue;
                            }
                        }

                        cur = cur.Next;
                    }
                }

                cur = result.First;

                j = 0;

                while (cur != null)
                {
                    if (cur.Value.Position >= unknownWords[j].Position)
                    {
                        result.AddBefore(cur, unknownWords[j]);
                        j++;
                        if (j >= unknownWords.Count)
                        {
                            break;
                        }
                    }

                    if (cur.Value.Position < unknownWords[j].Position)
                    {
                        cur = cur.Next;
                    }
                }

                while (j < unknownWords.Count)
                {
                    result.AddLast(unknownWords[j]);
                    j++;
                }
            }


            #endregion



            return(result);
        }
Exemple #7
0
        private void ProcessAfterSegment(string orginalText, SuperLinkedList <WordInfo> result)
        {
            //匹配同义词
            if (_Options.SynonymOutput)
            {
                SuperLinkedListNode <WordInfo> node = result.First;

                while (node != null)
                {
                    List <string> synonyms = _Synonym.GetSynonyms(node.Value.Word);

                    if (synonyms != null)
                    {
                        foreach (string word in synonyms)
                        {
                            node = result.AddAfter(node, new WordInfo(word, node.Value.Position,
                                                                      node.Value.Pos, node.Value.Frequency, _Parameters.SymbolRank,
                                                                      WordType.Synonym, node.Value.WordType));
                        }
                    }

                    node = node.Next;
                }
            }

            //通配符匹配
            if (_Options.WildcardOutput)
            {
                SuperLinkedListNode <WordInfo> node = result.First;

                while (node != null)
                {
                    List <Dict.Wildcard.WildcardInfo> wildcards =
                        _Wildcard.GetWildcards(node.Value.Word);

                    if (wildcards.Count > 0)
                    {
                        for (int i = 0; i < wildcards.Count; i++)
                        {
                            Dict.Wildcard.WildcardInfo wildcardInfo = wildcards[i];

                            int count = wildcardInfo.Segments.Count;
                            if (!_Options.WildcardSegment)
                            {
                                count = 1;
                            }

                            for (int j = 0; j < count; j++)
                            {
                                WordInfo wi = wildcardInfo.Segments[j];

                                if (wi.Word == node.Value.Word)
                                {
                                    continue;
                                }

                                wi.Rank      = _Parameters.WildcardRank;
                                wi.Position += node.Value.Position;
                                result.AddBefore(node, wi);
                            }
                        }
                    }

                    node = node.Next;

                    if (node != null)
                    {
                        //过滤英文分词时多元分词重复输出的问题
                        if (node.Previous.Value.Word.ToLower() == node.Value.Word.ToLower())
                        {
                            node = node.Next;
                        }
                    }
                }
            }

            //用户自定义规则
            //if (_Options.CustomRule)
            //{
            //    ICustomRule rule = CustomRule.GetCustomRule(_Parameters.CustomRuleAssemblyFileName,
            //        _Parameters.CustomRuleFullClassName);

            //    if (rule != null)
            //    {
            //        rule.Text = orginalText;
            //        rule.AfterSegment(result);
            //    }

            //}
        }
Exemple #8
0
        private bool MergeEnglishSpecialWord(string orginalText, SuperLinkedList <WordInfo> wordInfoList, ref SuperLinkedListNode <WordInfo> current)
        {
            SuperLinkedListNode <WordInfo> cur = current;

            cur = cur.Next;

            int last = -1;

            while (cur != null)
            {
                if (cur.Value.WordType == WordType.Symbol || cur.Value.WordType == WordType.English)
                {
                    last = cur.Value.Position + cur.Value.Word.Length;
                    cur  = cur.Next;
                }
                else
                {
                    break;
                }
            }


            if (last >= 0)
            {
                int first = current.Value.Position;

                string newWord = orginalText.Substring(first, last - first);

                WordAttribute wa = _WordDictionary.GetWordAttr(newWord);

                if (wa == null)
                {
                    return(false);
                }

                while (current != cur)
                {
                    SuperLinkedListNode <WordInfo> removeItem = current;
                    current = current.Next;
                    wordInfoList.Remove(removeItem);
                }

                WordInfo newWordInfo = new WordInfo(new Dict.PositionLength(first, last - first,
                                                                            wa), orginalText, _Parameters);

                newWordInfo.WordType = WordType.English;
                newWordInfo.Rank     = _Parameters.EnglishRank;

                if (_Options.EnglishSegment)
                {
                    string lowerWord = newWordInfo.Word.ToLower();

                    if (lowerWord != newWordInfo.Word)
                    {
                        if (current == null)
                        {
                            wordInfoList.AddLast(newWordInfo);
                        }
                        else
                        {
                            wordInfoList.AddBefore(current, newWordInfo);
                        }
                    }

                    newWordInfo = new WordInfo(lowerWord, newWordInfo.Position, newWordInfo.Pos, newWordInfo.Frequency, _Parameters.EnglishLowerRank, newWordInfo.WordType,
                                               newWordInfo.OriginalWordType);
                }
                else if (_Options.IgnoreCapital)
                {
                    newWordInfo.Word = newWordInfo.Word.ToLower();
                }

                if (current == null)
                {
                    wordInfoList.AddLast(newWordInfo);
                }
                else
                {
                    wordInfoList.AddBefore(current, newWordInfo);
                }

                return(true);
            }


            return(false);
        }
Exemple #9
0
        private SuperLinkedList <WordInfo> PreSegment(String text)
        {
            try
            {
                SuperLinkedList <WordInfo> result = GetInitSegment(text);

                SuperLinkedListNode <WordInfo> cur = result.First;

                while (cur != null)
                {
                    if (_Options.IgnoreSpace)
                    {
                        if (cur.Value.WordType == WordType.Space)
                        {
                            SuperLinkedListNode <WordInfo> lst = cur;
                            cur = cur.Next;
                            result.Remove(lst);
                            continue;
                        }
                    }

                    switch (cur.Value.WordType)
                    {
                    case WordType.SimplifiedChinese:

                        string inputText = cur.Value.Word;

                        WordType originalWordType = WordType.SimplifiedChinese;

                        //_Options.TraditionalChineseEnabled = true;
                        if (_Options.TraditionalChineseEnabled)
                        {
                            string simplified = WordDictionary.ToSimlifiedChinese(cur.Value.Word);

                            if (simplified != cur.Value.Word)
                            {
                                originalWordType = WordType.TraditionalChinese;
                                inputText        = simplified;
                            }
                        }

                        Framework.AppendList <Dict.PositionLength> pls = _WordDictionary.GetAllMatchs(inputText, _Options.ChineseNameIdentify);
                        Match.ChsFullTextMatch chsMatch = new Match.ChsFullTextMatch(_WordDictionary);
                        chsMatch.Options    = _Options;
                        chsMatch.Parameters = _Parameters;
                        SuperLinkedList <WordInfo> chsMatchWords = chsMatch.Match(pls.Items, cur.Value.Word, pls.Count);

                        SuperLinkedListNode <WordInfo> curChsMatch = chsMatchWords.First;
                        while (curChsMatch != null)
                        {
                            WordInfo wi = curChsMatch.Value;

                            wi.Position        += cur.Value.Position;
                            wi.OriginalWordType = originalWordType;
                            wi.WordType         = originalWordType;

                            if (_Options.OutputSimplifiedTraditional)
                            {
                                if (_Options.TraditionalChineseEnabled)
                                {
                                    string   newWord;
                                    WordType wt;

                                    if (originalWordType == WordType.SimplifiedChinese)
                                    {
                                        newWord = WordDictionary.ToTraditionalChinese(wi.Word);
                                        wt      = WordType.TraditionalChinese;
                                    }
                                    else
                                    {
                                        newWord = WordDictionary.ToSimlifiedChinese(wi.Word);
                                        wt      = WordType.SimplifiedChinese;
                                    }

                                    if (newWord != wi.Word)
                                    {
                                        WordInfo newWordInfo = new WordInfo(wi);
                                        newWordInfo.Word             = newWord;
                                        newWordInfo.OriginalWordType = originalWordType;
                                        newWordInfo.WordType         = wt;
                                        newWordInfo.Rank             = _Parameters.SimplifiedTraditionalRank;
                                        newWordInfo.Position         = wi.Position;
                                        chsMatchWords.AddBefore(curChsMatch, newWordInfo);
                                    }
                                }
                            }

                            curChsMatch = curChsMatch.Next;
                        }

                        SuperLinkedListNode <WordInfo> lst        = result.AddAfter(cur, chsMatchWords);
                        SuperLinkedListNode <WordInfo> removeItem = cur;
                        cur = lst.Next;
                        result.Remove(removeItem);
                        break;

                    case WordType.English:
                        cur.Value.Rank = _Parameters.EnglishRank;
                        List <string> output;
                        cur.Value.Word = ConvertChineseCapitalToAsiic(cur.Value.Word);

                        if (_Options.EnglishSegment)
                        {
                            string lower = cur.Value.Word.ToLower();

                            if (lower != cur.Value.Word)
                            {
                                result.AddBefore(cur, new WordInfo(lower, cur.Value.Position, POS.POS_A_NX, 1,
                                                                   _Parameters.EnglishLowerRank, WordType.English, WordType.English));
                            }

                            string stem = GetStem(lower);

                            if (!string.IsNullOrEmpty(stem))
                            {
                                if (lower != stem)
                                {
                                    result.AddBefore(cur, new WordInfo(stem, cur.Value.Position, POS.POS_A_NX, 1,
                                                                       _Parameters.EnglishStemRank, WordType.English, WordType.English));
                                }
                            }
                        }
                        else if (_Options.IgnoreCapital)
                        {
                            cur.Value.Word = cur.Value.Word.ToLower();
                        }

                        if (_Options.EnglishMultiDimensionality)
                        {
                            bool needSplit = false;

                            foreach (char c in cur.Value.Word)
                            {
                                if ((c >= '0' && c <= '9') || (c == '_'))
                                {
                                    needSplit = true;
                                    break;
                                }
                            }

                            if (needSplit)
                            {
                                if (Framework.Regex.GetMatchStrings(cur.Value.Word, PATTERNS, true, out output))
                                {
                                    int outputCount = 0;

                                    foreach (string str in output)
                                    {
                                        if (!string.IsNullOrEmpty(str))
                                        {
                                            outputCount++;

                                            if (outputCount > 1)
                                            {
                                                break;
                                            }
                                        }
                                    }


                                    if (outputCount > 1)
                                    {
                                        int position = cur.Value.Position;

                                        foreach (string splitWord in output)
                                        {
                                            if (string.IsNullOrEmpty(splitWord))
                                            {
                                                continue;
                                            }

                                            WordInfo wi;

                                            if (splitWord[0] >= '0' && splitWord[0] <= '9')
                                            {
                                                wi                  = new WordInfo(splitWord, POS.POS_A_M, 1);
                                                wi.Position         = position;
                                                wi.Rank             = _Parameters.NumericRank;
                                                wi.OriginalWordType = WordType.English;
                                                wi.WordType         = WordType.Numeric;
                                            }
                                            else
                                            {
                                                wi                  = new WordInfo(splitWord, POS.POS_A_NX, 1);
                                                wi.Position         = position;
                                                wi.Rank             = _Parameters.EnglishRank;
                                                wi.OriginalWordType = WordType.English;
                                                wi.WordType         = WordType.English;
                                            }

                                            result.AddBefore(cur, wi);
                                            position += splitWord.Length;
                                        }
                                    }
                                }
                            }
                        }

                        if (!MergeEnglishSpecialWord(text, result, ref cur))
                        {
                            cur = cur.Next;
                        }

                        break;

                    case WordType.Numeric:
                        cur.Value.Word = ConvertChineseCapitalToAsiic(cur.Value.Word);
                        cur.Value.Rank = _Parameters.NumericRank;

                        if (!MergeEnglishSpecialWord(text, result, ref cur))
                        {
                            cur = cur.Next;
                        }

                        //cur = cur.Next;
                        break;

                    case WordType.Symbol:
                        cur.Value.Rank = _Parameters.SymbolRank;
                        cur            = cur.Next;
                        break;

                    default:
                        cur = cur.Next;
                        break;
                    }
                }


                return(result);
            }
            catch (Exception)
            {
                throw;
            }
        }
Exemple #10
0
        /// <summary>
        /// 合并英文专用词。
        /// 如果字典中有英文专用词如U.S.A, C++.C#等
        /// 需要对初步分词后的英文和字母进行合并
        /// </summary>
        /// <param name="words"></param>
        /// <param name="start"></param>
        /// <param name="end"></param>
        /// <returns></returns>
        //private String MergeEnglishSpecialWord(CExtractWords extractWords, ArrayList words, int start, ref int end)
        //{
        //    StringBuilder str = new StringBuilder();

        //    int i;

        //    for (i = start; i < words.Count; i++)
        //    {
        //        string word = (string)words[i];

        //        //word 为空或者为空格回车换行等分割符号,中断扫描
        //        if (word.Trim() == "")
        //        {
        //            break;
        //        }

        //        //如果遇到中文,中断扫描
        //        if (word[0] >= 0x4e00 && word[0] <= 0x9fa5)
        //        {
        //            break;
        //        }

        //        str.Append(word);
        //    }

        //    String mergeString = str.ToString();
        //    List<T_WordInfo> exWords = extractWords.ExtractFullText(mergeString);

        //    if (exWords.Count == 1)
        //    {
        //        T_WordInfo info = (T_WordInfo)exWords[0];
        //        if (info.Word.Length == mergeString.Length)
        //        {
        //            end = i;
        //            return mergeString;
        //        }
        //    }

        //    return null;

        //}

        private bool MergeEnglishSpecialWord(string orginalText, SuperLinkedList<WordInfo> wordInfoList, ref SuperLinkedListNode<WordInfo> current)
        {
            SuperLinkedListNode<WordInfo> cur = current;

            cur = cur.Next;

            int last = -1;

            while (cur != null)
            {
                if (cur.Value.WordType == WordType.Symbol || cur.Value.WordType == WordType.English)
                {
                    last = cur.Value.Position + cur.Value.Word.Length;
                    cur = cur.Next;
                }
                else
                {
                    break;
                }
            }


            if (last >= 0)
            {
                int first = current.Value.Position;

                string newWord = orginalText.Substring(first, last - first);

                WordAttribute wa = _WordDictionary.GetWordAttr(newWord);

                if (wa == null)
                {
                    return false;
                }

                while (current != cur)
                {
                    SuperLinkedListNode<WordInfo> removeItem = current;
                    current = current.Next;
                    wordInfoList.Remove(removeItem);
                }

                WordInfo newWordInfo = new WordInfo(new PanGu.Dict.PositionLength(first, last - first, 
                    wa), orginalText, _Parameters);

                newWordInfo.WordType = WordType.English;
                newWordInfo.Rank = _Parameters.EnglishRank;

                if (current == null)
                {
                    wordInfoList.AddLast(newWordInfo);
                }
                else
                {
                    wordInfoList.AddBefore(current, newWordInfo);
                }

                return true;
            }


            return false;

        }
Exemple #11
0
        public void AfterSegment(SuperLinkedList <WordInfo> result)
        {
            SuperLinkedListNode <WordInfo> node = result.First;

            SuperLinkedListNode <WordInfo> vWordNode = null;
            SuperLinkedListNode <WordInfo> lastNode  = null;
            bool isVersion            = false;
            int  versionBeginPosition = -1;

            while (node != null)
            {
                if (vWordNode == null)
                {
                    if (node.Value.WordType == WordType.English)
                    {
                        //匹配 V 这个字符,作为版本号的开始
                        if (node.Value.Word.Length == 1)
                        {
                            if (node.Value.Word[0] == 'v' || node.Value.Word[0] == 'V')
                            {
                                vWordNode = node;
                                lastNode  = node;
                            }
                        }
                    }
                }
                else if (vWordNode != null)
                {
                    //如果V有多元分词情况,忽略,跳到下一个
                    if (node.Value.Position == vWordNode.Value.Position)
                    {
                        node = node.Next;
                        continue;
                    }

                    //匹配数字或点
                    if (node.Value.WordType == WordType.Numeric ||
                        node.Value.Word == ".")
                    {
                        if (node.Value.Position - (lastNode.Value.Position + lastNode.Value.Word.Length) <= 1)
                        {
                            if (versionBeginPosition < 0)
                            {
                                versionBeginPosition = node.Value.Position;
                            }

                            isVersion = true;
                            lastNode  = node;

                            node = node.Next;
                            continue;
                        }
                    }

                    if (isVersion)
                    {
                        //如果是版本号,提取版本号
                        Pickup(result, vWordNode, lastNode, versionBeginPosition);
                        vWordNode            = null;
                        lastNode             = null;
                        versionBeginPosition = -1;
                        isVersion            = false;
                        continue;
                    }
                }

                node = node.Next;
            }

            if (isVersion)
            {
                //如果是版本号,提取版本号
                Pickup(result, vWordNode, lastNode, versionBeginPosition);
            }
        }