public virtual void Init(SeqClassifierFlags flags) { this.flags = flags; factory = LineIterator.GetFactory(new Sighan2005DocumentReaderAndWriter.CTBDocumentParser(this)); // pichuan : flags.normalizationTable is null --> i believe this is replaced by some java class?? // (Thu Apr 24 11:10:42 2008) cdtos = new ChineseDocumentToSentenceProcessor(flags.normalizationTable); if (flags.dictionary != null) { string[] dicts = flags.dictionary.Split(","); cdict = new ChineseDictionary(dicts, cdtos, flags.expandMidDot); } if (flags.serializedDictionary != null) { string dict = flags.serializedDictionary; cdict = new ChineseDictionary(dict, cdtos, flags.expandMidDot); } if (flags.dictionary2 != null) { string[] dicts2 = flags.dictionary2.Split(","); cdict2 = new ChineseDictionary(dicts2, cdtos, flags.expandMidDot); } }
private static void AddDictionaryFeatures(ChineseDictionary dict, Type lbeginFieldName, Type lmiddleFieldName, Type lendFieldName, string nonspaceLine, IList <CoreLabel> lwi) { int lwiSize = lwi.Count; if (lwiSize != nonspaceLine.Length) { throw new Exception(); } int[] lbegin = new int[lwiSize]; int[] lmiddle = new int[lwiSize]; int[] lend = new int[lwiSize]; for (int i = 0; i < lwiSize; i++) { lbegin[i] = lmiddle[i] = lend[i] = 0; } for (int i_1 = 0; i_1 < lwiSize; i_1++) { for (int leng = ChineseDictionary.MaxLexiconLength; leng >= 1; leng--) { if (i_1 + leng - 1 < lwiSize) { if (dict.Contains(Sharpen.Runtime.Substring(nonspaceLine, i_1, i_1 + leng))) { // lbegin if (leng > lbegin[i_1]) { lbegin[i_1] = leng; } // lmid int last = i_1 + leng - 1; if (leng == ChineseDictionary.MaxLexiconLength) { last += 1; } for (int mid = i_1 + 1; mid < last; mid++) { if (leng > lmiddle[mid]) { lmiddle[mid] = leng; } } // lend if (leng < ChineseDictionary.MaxLexiconLength) { if (leng > lend[i_1 + leng - 1]) { lend[i_1 + leng - 1] = leng; } } } } } } for (int i_2 = 0; i_2 < lwiSize; i_2++) { StringBuilder sb = new StringBuilder(); sb.Append(lbegin[i_2]); if (lbegin[i_2] == ChineseDictionary.MaxLexiconLength) { sb.Append("+"); } lwi[i_2].Set(lbeginFieldName, sb.ToString()); sb = new StringBuilder(); sb.Append(lmiddle[i_2]); if (lmiddle[i_2] == ChineseDictionary.MaxLexiconLength) { sb.Append("+"); } lwi[i_2].Set(lmiddleFieldName, sb.ToString()); sb = new StringBuilder(); sb.Append(lend[i_2]); if (lend[i_2] == ChineseDictionary.MaxLexiconLength) { sb.Append("+"); } lwi[i_2].Set(lendFieldName, sb.ToString()); } }