static void InitAllDics() { // if (s_dicInit || s_dicProvider == null) { return; } if (s_thaiDic == null) { var customDic = new CustomDic(); customDic.SetCharRange(ThaiDictionaryBreakingEngine.FirstChar, ThaiDictionaryBreakingEngine.LastChar); customDic.LoadSortedUniqueWordList(s_dicProvider.GetSortedUniqueWordList("thai")); s_thaiDic = customDic; } if (s_laoDic == null) { var customDic = new CustomDic(); customDic.SetCharRange(LaoDictionaryBreakingEngine.FirstChar, LaoDictionaryBreakingEngine.LastChar); customDic.LoadSortedUniqueWordList(s_dicProvider.GetSortedUniqueWordList("lao")); s_laoDic = customDic; } if (s_enAbbrvDic == null) { s_enAbbrvDic = new CustomAbbrvDic(); s_enAbbrvDic.LoadSortedUniqueWordList(s_dicProvider.GetSortedUniqueWordList("abbrv-en")); } s_dicInit = true; }
static void InitAllDics() { if (thaiDicBreakingEngine == null) { var customDic = new CustomDic(); thaiDicBreakingEngine = new ThaiDictionaryBreakingEngine(); thaiDicBreakingEngine.SetDictionaryData(customDic);//add customdic to the breaker customDic.SetCharRange(thaiDicBreakingEngine.FirstUnicodeChar, thaiDicBreakingEngine.LastUnicodeChar); using (Stream data = s_dataProvider.GetDataStream("thaidict.txt")) { customDic.LoadFromDataStream(data); } } if (laoDicBreakingEngine == null) { var customDic = new CustomDic(); laoDicBreakingEngine = new LaoDictionaryBreakingEngine(); laoDicBreakingEngine.SetDictionaryData(customDic);//add customdic to the breaker customDic.SetCharRange(laoDicBreakingEngine.FirstUnicodeChar, laoDicBreakingEngine.LastUnicodeChar); using (Stream data = s_dataProvider.GetDataStream("laodict.txt")) { customDic.LoadFromDataStream(data); } } }
static void InitAllDics() { if (thaiDicBreakingEngine == null) { var customDic = new CustomDic(); thaiDicBreakingEngine = new ThaiDictionaryBreakingEngine(); thaiDicBreakingEngine.SetDictionaryData(customDic);//add customdic to the breaker customDic.SetCharRange(thaiDicBreakingEngine.FirstUnicodeChar, thaiDicBreakingEngine.LastUnicodeChar); customDic.LoadSortedUniqueWordList(s_dicProvider.GetSortedUniqueWordList("thai")); } if (laoDicBreakingEngine == null) { var customDic = new CustomDic(); laoDicBreakingEngine = new LaoDictionaryBreakingEngine(); laoDicBreakingEngine.SetDictionaryData(customDic);//add customdic to the breaker customDic.SetCharRange(laoDicBreakingEngine.FirstUnicodeChar, laoDicBreakingEngine.LastUnicodeChar); customDic.LoadSortedUniqueWordList(s_dicProvider.GetSortedUniqueWordList("lao")); } }
WordGroup _resultWordGroup;//after call DoIndex() internal void DoIndex(CustomDicTextBuffer textBuffer, CustomDic owner) { //recursive if (this.PrefixLen > 7) { DoIndexOfSmallAmount(textBuffer); #if DEBUG dbugDataState = debugDataState.TooLongPrefix; #endif return; } //----------------------------------------------- bool hasEvalPrefix = false; if (subGroups == null) { subGroups = new DevelopingWordGroup[owner.LastChar - owner.FirstChar + 1]; } //-------------------------------- int j = wordSpanList.Count; int thisPrefixLen = this.PrefixLen; int doSepAt = thisPrefixLen; for (int i = 0; i < j; ++i) { WordSpan sp = wordSpanList[i]; if (sp.len > doSepAt) { char c = sp.GetChar(doSepAt, textBuffer); int c_index = c - owner.FirstChar; DevelopingWordGroup found = subGroups[c_index]; if (found == null) { //not found found = new DevelopingWordGroup(new WordSpan(sp.startAt, (byte)(doSepAt + 1))); subGroups[c_index] = found; } found.AddWordSpan(sp); } else { if (!hasEvalPrefix) { if (sp.SameTextContent(this.prefixSpan, textBuffer)) { hasEvalPrefix = true; this.PrefixIsWord = true; } } } } #if DEBUG this.dbugDataState = debugDataState.Indexed; #endif wordSpanList.Clear(); wordSpanList = null; //-------------------------------- //do sup index //foreach (WordGroup subgroup in this.wordGroups.Values) bool hasSomeSubGroup = false; foreach (DevelopingWordGroup subgroup in this.subGroups) { if (subgroup != null) { hasSomeSubGroup = true; //**** //performance factor here,**** //in this current version //if we not call DoIndex(), //this subgroup need linear search-> so it slow //so we call DoIndex until member count in the group <=3 //then it search faster, //but dictionary-building time may increase. if (subgroup.WordSpanListCount > 2) { subgroup.DoIndex(textBuffer, owner); } else { #if DEBUG subgroup.dbugDataState = debugDataState.SmallAmountOfMembers; #endif subgroup.DoIndexOfSmallAmount(textBuffer); } } } //-------------------------------- #if DEBUG this.dbugDataState = debugDataState.Indexed; #endif if (!hasSomeSubGroup) { //clear subGroups = null; } //-------------------------------- WordGroup[] newsubGroups = null; if (subGroups != null) { newsubGroups = new WordGroup[subGroups.Length]; for (int i = subGroups.Length - 1; i >= 0; --i) { DevelopingWordGroup subg = subGroups[i]; if (subg != null) { newsubGroups[i] = subg.ResultWordGroup; } } } //-------------------------------- this._resultWordGroup = new WordGroup( this.prefixSpan, newsubGroups, null, this.PrefixIsWord); }
public void SetDictionaryData(CustomDic customDic) { _customDic = customDic; }