public WordAttributeStruct(WordAttribute wa) { this.Word = wa.Word; this.Pos = wa.Pos; this.Frequency = wa.Frequency; }
public WordInfo(WordAttribute wordAttr) { this.Word = wordAttr.Word; this.Pos = wordAttr.Pos; this.Frequency = wordAttr.Frequency; }
/// <summary> /// 合并英文专用词。 /// 如果字典中有英文专用词如U.S.A, C++.C#等 /// 需要对初步分词后的英文和字母进行合并 /// </summary> /// <param name="words"></param> /// <param name="start"></param> /// <param name="end"></param> /// <returns></returns> //private String MergeEnglishSpecialWord(CExtractWords extractWords, ArrayList words, int start, ref int end) //{ // StringBuilder str = new StringBuilder(); // int i; // for (i = start; i < words.Count; i++) // { // string word = (string)words[i]; // //word 为空或者为空格回车换行等分割符号,中断扫描 // if (word.Trim() == "") // { // break; // } // //如果遇到中文,中断扫描 // if (word[0] >= 0x4e00 && word[0] <= 0x9fa5) // { // break; // } // str.Append(word); // } // String mergeString = str.ToString(); // List<T_WordInfo> exWords = extractWords.ExtractFullText(mergeString); // if (exWords.Count == 1) // { // T_WordInfo info = (T_WordInfo)exWords[0]; // if (info.Word.Length == mergeString.Length) // { // end = i; // return mergeString; // } // } // return null; //} private bool MergeEnglishSpecialWord(string orginalText, SuperLinkedList <WordInfo> wordInfoList, ref SuperLinkedListNode <WordInfo> current) { SuperLinkedListNode <WordInfo> cur = current; cur = cur.Next; int last = -1; while (cur != null) { if (cur.Value.WordType == WordType.Symbol || cur.Value.WordType == WordType.English) { last = cur.Value.Position + cur.Value.Word.Length; cur = cur.Next; } else { break; } } if (last >= 0) { int first = current.Value.Position; string newWord = orginalText.Substring(first, last - first); WordAttribute wa = _WordDictionary.GetWordAttr(newWord); if (wa == null) { return(false); } while (current != cur) { SuperLinkedListNode <WordInfo> removeItem = current; current = current.Next; wordInfoList.Remove(removeItem); } WordInfo newWordInfo = new WordInfo(new Dict.PositionLength(first, last - first, wa), orginalText, _Parameters); newWordInfo.WordType = WordType.English; newWordInfo.Rank = _Parameters.EnglishRank; if (_Options.EnglishSegment) { string lowerWord = newWordInfo.Word.ToLower(); if (lowerWord != newWordInfo.Word) { if (current == null) { wordInfoList.AddLast(newWordInfo); } else { wordInfoList.AddBefore(current, newWordInfo); } } newWordInfo = new WordInfo(lowerWord, newWordInfo.Position, newWordInfo.Pos, newWordInfo.Frequency, _Parameters.EnglishLowerRank, newWordInfo.WordType, newWordInfo.OriginalWordType); } else if (_Options.IgnoreCapital) { newWordInfo.Word = newWordInfo.Word.ToLower(); } if (current == null) { wordInfoList.AddLast(newWordInfo); } else { wordInfoList.AddBefore(current, newWordInfo); } return(true); } return(false); }