public void LoadSortedUniqueWordList(IEnumerable <string> sortedWordList) { // load unique and sorted word list if (textBuffer != null) { return; } if (firstChar == '\0' || lastChar == '\0') { throw new NotSupportedException(); } //--------------- Dictionary <char, DevelopingWordGroup> wordGroups = new Dictionary <char, DevelopingWordGroup>(); textBuffer = new CustomDicTextBuffer(1024); foreach (string line in sortedWordList) { char[] lineBuffer = line.Trim().ToCharArray(); int lineLen = lineBuffer.Length; char c0; if (lineLen > 0 && (c0 = lineBuffer[0]) != '#') { int startAt = textBuffer.CurrentPosition; textBuffer.AddWord(lineBuffer); #if DEBUG if (lineLen > byte.MaxValue) { throw new NotSupportedException(); } #endif WordSpan wordspan = new WordSpan(startAt, (byte)lineLen); //each wordgroup contains text span DevelopingWordGroup found; if (!wordGroups.TryGetValue(c0, out found)) { found = new DevelopingWordGroup(new WordSpan(startAt, 1)); wordGroups.Add(c0, found); } found.AddWordSpan(wordspan); } //- next line } //------------------------------------------------------------------ textBuffer.Freeze(); //------------------------------------------------------------------ //do index DoIndex(wordGroups); //clear, not used wordGroups.Clear(); }
void DoIndex(Dictionary <char, DevelopingWordGroup> wordGroups) { //1. expand word group WordGroup[] newWordGroups = new WordGroup[this.lastChar - this.firstChar + 1]; foreach (var kp in wordGroups) { //for each dev word group int index = TransformCharToIndex(kp.Key); DevelopingWordGroup devWordGroup = kp.Value; devWordGroup.DoIndex(this.textBuffer, this); newWordGroups[index] = devWordGroup.ResultWordGroup; } this.wordGroups = newWordGroups; }
WordGroup _resultWordGroup;//after call DoIndex() internal void DoIndex(CustomDicTextBuffer textBuffer, CustomDic owner) { //recursive if (this.PrefixLen > 7) { DoIndexOfSmallAmount(textBuffer); #if DEBUG dbugDataState = debugDataState.TooLongPrefix; #endif return; } //----------------------------------------------- bool hasEvalPrefix = false; if (subGroups == null) { subGroups = new DevelopingWordGroup[owner.LastChar - owner.FirstChar + 1]; } //-------------------------------- int j = wordSpanList.Count; int thisPrefixLen = this.PrefixLen; int doSepAt = thisPrefixLen; for (int i = 0; i < j; ++i) { WordSpan sp = wordSpanList[i]; if (sp.len > doSepAt) { char c = sp.GetChar(doSepAt, textBuffer); int c_index = c - owner.FirstChar; DevelopingWordGroup found = subGroups[c_index]; if (found == null) { //not found found = new DevelopingWordGroup(new WordSpan(sp.startAt, (byte)(doSepAt + 1))); subGroups[c_index] = found; } found.AddWordSpan(sp); } else { if (!hasEvalPrefix) { if (sp.SameTextContent(this.prefixSpan, textBuffer)) { hasEvalPrefix = true; this.PrefixIsWord = true; } } } } #if DEBUG this.dbugDataState = debugDataState.Indexed; #endif wordSpanList.Clear(); wordSpanList = null; //-------------------------------- //do sup index //foreach (WordGroup subgroup in this.wordGroups.Values) bool hasSomeSubGroup = false; foreach (DevelopingWordGroup subgroup in this.subGroups) { if (subgroup != null) { hasSomeSubGroup = true; //**** //performance factor here,**** //in this current version //if we not call DoIndex(), //this subgroup need linear search-> so it slow //so we call DoIndex until member count in the group <=3 //then it search faster, //but dictionary-building time may increase. if (subgroup.WordSpanListCount > 2) { subgroup.DoIndex(textBuffer, owner); } else { #if DEBUG subgroup.dbugDataState = debugDataState.SmallAmountOfMembers; #endif subgroup.DoIndexOfSmallAmount(textBuffer); } } } //-------------------------------- #if DEBUG this.dbugDataState = debugDataState.Indexed; #endif if (!hasSomeSubGroup) { //clear subGroups = null; } //-------------------------------- WordGroup[] newsubGroups = null; if (subGroups != null) { newsubGroups = new WordGroup[subGroups.Length]; for (int i = subGroups.Length - 1; i >= 0; --i) { DevelopingWordGroup subg = subGroups[i]; if (subg != null) { newsubGroups[i] = subg.ResultWordGroup; } } } //-------------------------------- this._resultWordGroup = new WordGroup( this.prefixSpan, newsubGroups, null, this.PrefixIsWord); }
public void LoadFromTextfile(string filename) { //once only if (textBuffer != null) { return; } if (firstChar == '\0' || lastChar == '\0') { throw new NotSupportedException(); } //--------------- Dictionary <char, DevelopingWordGroup> wordGroups = new Dictionary <char, DevelopingWordGroup>(); using (FileStream fs = new FileStream(filename, FileMode.Open)) using (StreamReader reader = new StreamReader(fs)) { //init with filesize textBuffer = new TextBuffer((int)fs.Length); string line = reader.ReadLine(); while (line != null) { line = line.Trim(); char[] lineBuffer = line.ToCharArray(); int lineLen = lineBuffer.Length; char c0; if (lineLen > 0 && (c0 = lineBuffer[0]) != '#') { int startAt = textBuffer.CurrentPosition; textBuffer.AddWord(lineBuffer); #if DEBUG if (lineLen > byte.MaxValue) { throw new NotSupportedException(); } #endif WordSpan wordspan = new WordSpan(startAt, (byte)lineLen); //each wordgroup contains text span DevelopingWordGroup found; if (!wordGroups.TryGetValue(c0, out found)) { found = new DevelopingWordGroup(new WordSpan(startAt, 1)); wordGroups.Add(c0, found); } found.AddWordSpan(wordspan); } //- next line line = reader.ReadLine(); } reader.Close(); fs.Close(); } //------------------------------------------------------------------ textBuffer.Freeze(); //------------------------------------------------------------------ //do index DoIndex(wordGroups); //clear, not used wordGroups.Clear(); }