//public GazetteerTerm(IEnumerable<string> tokens, IEnumerable<string> posConstraints, bool lemmatize, CaseMatchingType caseMatchingType, bool enabled, Gazetteer gazetteer) //{ // InitializeInstance(tokens, posConstraints, lemmatize, caseMatchingType, enabled, gazetteer); //} public GazetteerTerm(string termDef, Gazetteer gazetteer, CaseMatchingType defaultCaseMatchingType, bool defaultLemmatizeFlag, bool defaultEnabledFlag) { // default settings CaseMatchingType caseMatchingType = defaultCaseMatchingType; bool lemmatize = defaultLemmatizeFlag; bool enabled = defaultEnabledFlag; // parse term settings termDef = mConstraintRegex.Replace(termDef, new MatchEvaluator(delegate(Match m) { ParseGazetteerSettings(m.Value, ref caseMatchingType, ref lemmatize, ref enabled); return(""); })); ArrayList <string> tokens = new ArrayList <string>(); ArrayList <string> posConstraints = new ArrayList <string>(); Match match = mGazetteerMicroTokenRegex.Match(termDef); while (match.Success) { string token = match.Value; string[] tokenParts = token.Split('/'); string posConstraint = null; if (tokenParts.Length == 2) { token = tokenParts[0]; posConstraint = tokenParts[1]; } tokens.Add(token); posConstraints.Add(posConstraint); match = match.NextMatch(); } InitializeInstance(tokens, posConstraints, lemmatize, caseMatchingType, enabled, gazetteer); }
public void LoadGazetteers() { mLogger.Info("LoadGazetteers", "Loading gazetteers ..."); Entity[] gazetteers = mRdfStore.SelectSubjects(P_TYPE, C_GAZETTEER); mLogger.Info("LoadGazetteers", "Found {0} gazetteers.", gazetteers.Length); // create gazetteer objects foreach (Entity gazetteer in gazetteers) { Gazetteer gazetteerObj = new Gazetteer(gazetteer.Uri); mGazetteers.Add(gazetteer.Uri, gazetteerObj); // read stop words gazetteerObj.ReadStopWords(mRdfStore); } // import gazetteers and read conditions foreach (Entity gazetteer in gazetteers) { mGazetteers[gazetteer.Uri].ImportGazetteers(mRdfStore, mGazetteers); mGazetteers[gazetteer.Uri].ReadConditions(mRdfStore, mGazetteers); } // read terms foreach (Entity gazetteer in gazetteers) { mGazetteers[gazetteer.Uri].ReadTerms(mRdfStore); } }
public bool Match(string[] words, int startIdx, out int len, Gazetteer gazetteer) { int idx = startIdx; len = 0; if (string.Compare(words[idx], mWords[0], gazetteer.mIgnoreCase) != 0) { return(false); } // first word must match Utils.CaseType caseType = Utils.GetCaseType(words[idx]); if (!(caseType == Utils.CaseType.ABC || caseType == Utils.CaseType.Abc || caseType == Utils.CaseType.AbC)) { return(false); } // *** only for the demo idx++; for (int i = 1; i < mWords.Count; i++) { while (idx < words.Length && gazetteer.IsStopWord(words[idx].ToLower())) { idx++; } // skip stop words if (idx == words.Length) { return(false); } if (string.Compare(words[idx++], mWords[i], gazetteer.mIgnoreCase) != 0) { return(false); } } len = idx - startIdx; return(true); }
public void ReadGazetteers() { mLogger.Info("ReadGazetteers", "Reading gazetteers ..."); Entity[] gazetteers = mRdfStore.SelectSubjects(P_TYPE, C_GAZETTEER); mLogger.Info("ReadGazetteers", "Found {0} gazetteers.", gazetteers.Length); // gazetteer objects foreach (Entity gazetteer in gazetteers) { Gazetteer gazetteerObj = new Gazetteer(); gazetteerObj.mUri = gazetteer.Uri; mGazetteers.Add(gazetteer.Uri, gazetteerObj); gazetteerObj.ReadStopWords(mRdfStore); // stop words gazetteerObj.ReadSettings(mRdfStore); // settings } // imported gazetteers foreach (Entity gazetteer in gazetteers) { mGazetteers[gazetteer.Uri].ImportGazetteers(mRdfStore, mGazetteers); } // terms foreach (Entity gazetteer in gazetteers) { mGazetteers[gazetteer.Uri].ReadTerms(mRdfStore); } }
public bool IsMatch(string[] tokens, int startIdx, out int len, Gazetteer gazetteer) { len = startIdx; foreach (Term term in mTerms) { if (term.Match(tokens, startIdx, out len, gazetteer)) { return(true); } } return(false); }
public void CheckGazetteer(Gazetteer gazetteer, string word) { if (gazetteer.Contains(word) && GetParent().GetData().GetName().Equals("NNP")) { GetLayerInfo().SetLayerData(ViewLayerType.NER, gazetteer.GetName()); } if (word.Contains("'") && gazetteer.Contains(word.Substring(0, word.IndexOf("'"))) && GetParent().GetData().GetName().Equals("NNP")) { GetLayerInfo().SetLayerData(ViewLayerType.NER, gazetteer.GetName()); } }
public async Task <IActionResult> OnGetAsync(long?id) { if (id == null) { return(NotFound()); } Gazetteer = await _context.Gazetteer.FirstOrDefaultAsync(m => m.GazetteerId == id); if (Gazetteer == null) { return(NotFound()); } return(Page()); }
public bool Match(string[] tokens, int startIdx, out int len, Gazetteer gazetteer) { if (IsMatch(tokens, startIdx, out len, gazetteer)) { return(true); } foreach (Gazetteer importedGazetteer in mImportedGazetteers) { if (importedGazetteer.Match(tokens, startIdx, out len, gazetteer)) { return(true); } } return(false); }
public async Task <IActionResult> OnPostAsync(long?id) { if (id == null) { return(NotFound()); } Gazetteer = await _context.Gazetteer.FindAsync(id); if (Gazetteer != null) { _context.Gazetteer.Remove(Gazetteer); await _context.SaveChangesAsync(); } return(RedirectToPage("./Index")); }
public void Match(Gazetteer gazetteer, out ArrayList <Pair <int, int> > spans) { spans = new ArrayList <Pair <int, int> >(); foreach (GazetteerTerm term in gazetteer.mTerms) { if (!term.mEnabled) { continue; } int lastIdx = mTokens.Count - term.mTokens.Count; for (int i = 0; i <= lastIdx; i++) { int j = i; bool found = false; for (int k = 0; k < term.mTokens.Count; k++) { if (!Match(term.mTokens[k], mTokens[j], term.mCaseMatchingType, /*firstToken=*/ k == 0)) { break; } if (found = k == term.mTokens.Count - 1) { break; } j++; while (j < mTokens.Count && gazetteer.IsStopWord(mTokens[j].mTokenStr.ToLower())) { j++; } if (j >= mTokens.Count) { break; } } if (found) // gazetteer term found (starting at micro-token i, ending at micro-token j) { int len = mTokens[j].mSpanEnd - mTokens[i].mSpanStart + 1; // *** this counts all chars in the annotation (incl. spaces and non-token chars) if (len >= term.mMinLen) { //spans.Add(new Pair<int, int>(mTokens[i].mSpanStart, mTokens[j].mSpanEnd)); spans.Add(new Pair <int, int>(i, j)); } } } } }
public void TestContains() { var gazetteer = new Gazetteer("location", "gazetteer-location.txt"); Assert.True(gazetteer.Contains("bağdat")); Assert.True(gazetteer.Contains("BAĞDAT")); Assert.True(gazetteer.Contains("belçika")); Assert.True(gazetteer.Contains("BELÇİKA")); Assert.True(gazetteer.Contains("körfez")); Assert.True(gazetteer.Contains("KÖRFEZ")); Assert.True(gazetteer.Contains("küba")); Assert.True(gazetteer.Contains("KÜBA")); Assert.True(gazetteer.Contains("varşova")); Assert.True(gazetteer.Contains("VARŞOVA")); Assert.True(gazetteer.Contains("krallık")); Assert.True(gazetteer.Contains("KRALLIK")); Assert.True(gazetteer.Contains("berlin")); Assert.True(gazetteer.Contains("BERLİN")); }
public Condition(Gazetteer gazetteer, Level level) { mGazetteer = gazetteer; mLevel = level; }
private void InitializeInstance(IEnumerable <string> tokens, IEnumerable <string> posConstraints, bool lemmatize, CaseMatchingType caseMatchingType, bool enabled, Gazetteer gazetteer) { mCaseMatchingType = caseMatchingType; mEnabled = enabled; IEnumerator <string> enumTokens = tokens.GetEnumerator(); IEnumerator <string> enumPosConstraints = posConstraints.GetEnumerator(); while (enumTokens.MoveNext() && enumPosConstraints.MoveNext()) { string tokenStr = Normalize(enumTokens.Current); string posConstraint = enumPosConstraints.Current; if (!gazetteer.IsStopWord(tokenStr.ToLower())) { string lemma = null; if (lemmatize) { lemma = mLemmatizer.GetStem(tokenStr); if (lemma == "") { lemma = tokenStr; } } GazetteerToken token = new GazetteerToken(tokenStr, posConstraint, lemma); mTokens.Add(token); } } if (mTokens.Count > 0) { PrepareTokens(caseMatchingType, lemmatize); } }
public ArrayList <string> DiscoverEntities(EntityRecognitionEngine e, out ArrayList <Pair <int, int> > spans) { Dictionary <Sentence, Dictionary <Gazetteer, ArrayList <Pair <int, int> > > > sentenceEntityInfo = new Dictionary <Sentence, Dictionary <Gazetteer, ArrayList <Pair <int, int> > > >(); Dictionary <TextBlock, Set <Gazetteer> > textBlockEntityInfo = new Dictionary <TextBlock, Set <Gazetteer> >(); Set <Gazetteer> documentEntityInfo = new Set <Gazetteer>(); ArrayList <Pair <int, int> > sentenceSpans = new ArrayList <Pair <int, int> >(); // look for gazetteer terms foreach (KeyValuePair <string, Gazetteer> gazetteer in e.mGazetteers) { foreach (TextBlock textBlock in mTextBlocks) { foreach (Sentence sentence in textBlock.mSentences) { sentence.Match(gazetteer.Value, out sentenceSpans); if (sentenceSpans.Count > 0) { Dictionary <Gazetteer, ArrayList <Pair <int, int> > > sentenceInfo; if (sentenceEntityInfo.TryGetValue(sentence, out sentenceInfo)) { sentenceInfo.Add(gazetteer.Value, sentenceSpans); } else { sentenceInfo = new Dictionary <Gazetteer, ArrayList <Pair <int, int> > >(); sentenceInfo.Add(gazetteer.Value, sentenceSpans); sentenceEntityInfo.Add(sentence, sentenceInfo); } } } } } // propagate discovered entities foreach (KeyValuePair <Sentence, Dictionary <Gazetteer, ArrayList <Pair <int, int> > > > sentenceInfo in sentenceEntityInfo) { foreach (KeyValuePair <Gazetteer, ArrayList <Pair <int, int> > > gazetteerInfo in sentenceInfo.Value) { documentEntityInfo.Add(gazetteerInfo.Key); TextBlock textBlock = sentenceInfo.Key.mTextBlock; Set <Gazetteer> textBlockInfo; if (textBlockEntityInfo.TryGetValue(textBlock, out textBlockInfo)) { textBlockInfo.Add(gazetteerInfo.Key); } else { textBlockInfo = new Set <Gazetteer>(new Gazetteer[] { gazetteerInfo.Key }); textBlockEntityInfo.Add(textBlock, textBlockInfo); } } } // check conditions spans = new ArrayList <Pair <int, int> >(); ArrayList <string> discoveredEntities = new ArrayList <string>(); // gazetteer URIs foreach (KeyValuePair <Sentence, Dictionary <Gazetteer, ArrayList <Pair <int, int> > > > sentenceInfo in sentenceEntityInfo) { foreach (KeyValuePair <Gazetteer, ArrayList <Pair <int, int> > > gazetteerInfo in sentenceInfo.Value) { Gazetteer gazetteer = gazetteerInfo.Key; Set <Gazetteer> textBlockGazetteers = textBlockEntityInfo[sentenceInfo.Key.mTextBlock]; bool valid = true; foreach (Condition condition in gazetteer.mConditions) { if (condition.mLevel == Condition.Level.Document) { if (!documentEntityInfo.Contains(condition.mGazetteer)) { valid = false; break; } } else if (condition.mLevel == Condition.Level.Block) { if (!textBlockGazetteers.Contains(condition.mGazetteer)) { valid = false; break; } } else if (condition.mLevel == Condition.Level.Sentence) { if (!sentenceInfo.Value.ContainsKey(condition.mGazetteer)) { valid = false; break; } } } if (valid) { for (int i = 0; i < gazetteerInfo.Value.Count; i++) { discoveredEntities.Add(gazetteer.mUri); } spans.AddRange(gazetteerInfo.Value); } } } return(discoveredEntities); }
public Condition(Gazetteer gazetteer, Level level) { mGazetteer = gazetteer; mLevel = level; }
public void Match(Gazetteer gazetteer, out ArrayList<Pair<int, int>> spans) { spans = new ArrayList<Pair<int, int>>(); foreach (GazetteerTerm term in gazetteer.mTerms) { if (!term.mEnabled) { continue; } int lastIdx = mTokens.Count - term.mTokens.Count; for (int i = 0; i <= lastIdx; i++) { int j = i; bool found = false; for (int k = 0; k < term.mTokens.Count; k++) { if (!Match(term.mTokens[k], mTokens[j], term.mCaseMatchingType, /*firstToken=*/k == 0)) { break; } if (found = k == term.mTokens.Count - 1) { break; } j++; while (j < mTokens.Count && gazetteer.IsStopWord(mTokens[j].mTokenStr.ToLower())) { j++; } if (j >= mTokens.Count) { break; } } if (found) // gazetteer term found (starting at micro-token i, ending at micro-token j) { spans.Add(new Pair<int, int>(mTokens[i].mSpanStart, mTokens[j].mSpanEnd)); } } } }
public ArrayList <string> DiscoverEntities(EntityRecognitionEngine e, out ArrayList <Pair <int, int> > spans) { Dictionary <Sentence, Dictionary <Gazetteer, ArrayList <Pair <int, int> > > > sentenceEntityInfo = new Dictionary <Sentence, Dictionary <Gazetteer, ArrayList <Pair <int, int> > > >(); Dictionary <TextBlock, Set <Gazetteer> > textBlockEntityInfo = new Dictionary <TextBlock, Set <Gazetteer> >(); Set <Gazetteer> documentEntityInfo = new Set <Gazetteer>(); ArrayList <Pair <int, int> > sentenceSpans = new ArrayList <Pair <int, int> >(); // look for gazetteer terms foreach (KeyValuePair <string, Gazetteer> gazetteer in e.mGazetteers) { foreach (TextBlock textBlock in mTextBlocks) { foreach (Sentence sentence in textBlock.mSentences) { sentence.Match(gazetteer.Value, out sentenceSpans); if (sentenceSpans.Count > 0) { Dictionary <Gazetteer, ArrayList <Pair <int, int> > > sentenceInfo; if (sentenceEntityInfo.TryGetValue(sentence, out sentenceInfo)) { sentenceInfo.Add(gazetteer.Value, sentenceSpans); } else { sentenceInfo = new Dictionary <Gazetteer, ArrayList <Pair <int, int> > >(); sentenceInfo.Add(gazetteer.Value, sentenceSpans); sentenceEntityInfo.Add(sentence, sentenceInfo); } } } } } // propagate discovered entities foreach (KeyValuePair <Sentence, Dictionary <Gazetteer, ArrayList <Pair <int, int> > > > sentenceInfo in sentenceEntityInfo) { foreach (KeyValuePair <Gazetteer, ArrayList <Pair <int, int> > > gazetteerInfo in sentenceInfo.Value) { documentEntityInfo.Add(gazetteerInfo.Key); TextBlock textBlock = sentenceInfo.Key.mTextBlock; Set <Gazetteer> textBlockInfo; if (textBlockEntityInfo.TryGetValue(textBlock, out textBlockInfo)) { textBlockInfo.Add(gazetteerInfo.Key); } else { textBlockInfo = new Set <Gazetteer>(new Gazetteer[] { gazetteerInfo.Key }); textBlockEntityInfo.Add(textBlock, textBlockInfo); } } } // check conditions spans = new ArrayList <Pair <int, int> >(); ArrayList <string> discoveredEntities = new ArrayList <string>(); // gazetteer URIs foreach (KeyValuePair <Sentence, Dictionary <Gazetteer, ArrayList <Pair <int, int> > > > sentenceInfo in sentenceEntityInfo) { foreach (KeyValuePair <Gazetteer, ArrayList <Pair <int, int> > > gazetteerInfo in sentenceInfo.Value) { Gazetteer gazetteer = gazetteerInfo.Key; Set <Gazetteer> textBlockGazetteers = textBlockEntityInfo[sentenceInfo.Key.mTextBlock]; bool valid = true; foreach (Condition condition in gazetteer.mConditions) { if (condition.mType == Condition.Type.Document) { if (!documentEntityInfo.Contains(condition.mGazetteer)) { valid = false; break; } } else if (condition.mType == Condition.Type.Block) { if (!textBlockGazetteers.Contains(condition.mGazetteer)) { valid = false; break; } } else if (condition.mType == Condition.Type.Sentence) { if (!sentenceInfo.Value.ContainsKey(condition.mGazetteer)) { valid = false; break; } } else if (condition.mType == Condition.Type.FollowedBy) { // fast check if (!sentenceInfo.Value.ContainsKey(condition.mGazetteer)) { valid = false; break; } // thorough check ArrayList <Pair <int, int> > tmp = new ArrayList <Pair <int, int> >(); ArrayList <Pair <int, int> > condSpans = sentenceInfo.Value[condition.mGazetteer]; foreach (Pair <int, int> span in gazetteerInfo.Value) { //Console.WriteLine(span); foreach (Pair <int, int> condSpan in condSpans) { //Console.WriteLine(" " + condSpan); if (span.Second == condSpan.First - 1) // span is valid { tmp.Add(span); } } } if (tmp.Count == 0) { valid = false; break; } //Console.WriteLine(tmp); gazetteerInfo.Value.Clear(); gazetteerInfo.Value.AddRange(tmp); } } if (valid) { for (int i = 0; i < gazetteerInfo.Value.Count; i++) { // check if inside another span bool skip = false; Pair <int, int> span = gazetteerInfo.Value[i]; foreach (KeyValuePair <Gazetteer, ArrayList <Pair <int, int> > > gazInfo in sentenceInfo.Value) { foreach (Pair <int, int> otherSpan in gazInfo.Value) { if (span.First >= otherSpan.First && span.Second <= otherSpan.Second && span != otherSpan) { skip = true; break; } } if (skip) { break; } } if (!skip) { discoveredEntities.Add(gazetteer.mUri); //spans.Add(span); spans.Add(new Pair <int, int>(sentenceInfo.Key.mTokens[span.First].mSpanStart, sentenceInfo.Key.mTokens[span.Second].mSpanEnd)); } } } } } return(discoveredEntities); }
private void InitializeInstance(IEnumerable<string> tokens, IEnumerable<string> posConstraints, bool lemmatize, CaseMatchingType caseMatchingType, bool enabled, Gazetteer gazetteer) { mCaseMatchingType = caseMatchingType; mEnabled = enabled; IEnumerator<string> enumTokens = tokens.GetEnumerator(); IEnumerator<string> enumPosConstraints = posConstraints.GetEnumerator(); while (enumTokens.MoveNext() && enumPosConstraints.MoveNext()) { string tokenStr = Normalize(enumTokens.Current); string posConstraint = enumPosConstraints.Current; if (!gazetteer.IsStopWord(tokenStr.ToLower())) { string lemma = null; if (lemmatize) { lemma = mLemmatizer.GetStem(tokenStr); if (lemma == "") { lemma = tokenStr; } } GazetteerToken token = new GazetteerToken(tokenStr, posConstraint, lemma); mTokens.Add(token); } } if (mTokens.Count > 0) { PrepareTokens(caseMatchingType, lemmatize); } }
public void LoadGazetteers() { mLogger.Info("LoadGazetteers", "Loading gazetteers ..."); Entity[] gazetteers = mRdfStore.SelectSubjects(P_TYPE, C_GAZETTEER); mLogger.Info("LoadGazetteers", "Found {0} gazetteers.", gazetteers.Length); // create gazetteer objects foreach (Entity gazetteer in gazetteers) { Gazetteer gazetteerObj = new Gazetteer(gazetteer.Uri); mGazetteers.Add(gazetteer.Uri, gazetteerObj); // read stop words gazetteerObj.ReadStopWords(mRdfStore); } // import gazetteers and read conditions foreach (Entity gazetteer in gazetteers) { mGazetteers[gazetteer.Uri].ImportGazetteers(mRdfStore, mGazetteers); mGazetteers[gazetteer.Uri].ReadConditions(mRdfStore, mGazetteers); } // read terms foreach (Entity gazetteer in gazetteers) { mGazetteers[gazetteer.Uri].ReadTerms(mRdfStore); } }
/** * <summary>Constructor for creating Person, Organization, and Location gazetteers in automatic Named Entity Recognition.</summary> */ public AutoNER() { personGazetteer = new Gazetteer("PERSON", "gazetteer-person.txt"); organizationGazetteer = new Gazetteer("ORGANIZATION", "gazetteer-organization.txt"); locationGazetteer = new Gazetteer("LOCATION", "gazetteer-location.txt"); }
public Condition(Gazetteer gazetteer, Type type) { mGazetteer = gazetteer; mType = type; }
public Condition(Gazetteer gazetteer, Type type) { mGazetteer = gazetteer; mType = type; }
public bool Match(string[] tokens, int startIdx, out int len, Gazetteer gazetteer) { if (IsMatch(tokens, startIdx, out len, gazetteer)) { return true; } foreach (Gazetteer importedGazetteer in mImportedGazetteers) { if (importedGazetteer.Match(tokens, startIdx, out len, gazetteer)) { return true; } } return false; }
public bool IsMatch(string[] tokens, int startIdx, out int len, Gazetteer gazetteer) { len = startIdx; foreach (Term term in mTerms) { if (term.Match(tokens, startIdx, out len, gazetteer)) { return true; } } return false; }
public void ReadGazetteers() { mLogger.Info("ReadGazetteers", "Reading gazetteers ..."); Entity[] gazetteers = mRdfStore.SelectSubjects(P_TYPE, C_GAZETTEER); mLogger.Info("ReadGazetteers", "Found {0} gazetteers.", gazetteers.Length); // gazetteer objects foreach (Entity gazetteer in gazetteers) { Gazetteer gazetteerObj = new Gazetteer(); gazetteerObj.mUri = gazetteer.Uri; mGazetteers.Add(gazetteer.Uri, gazetteerObj); gazetteerObj.ReadStopWords(mRdfStore); // stop words gazetteerObj.ReadSettings(mRdfStore); // settings } // imported gazetteers foreach (Entity gazetteer in gazetteers) { mGazetteers[gazetteer.Uri].ImportGazetteers(mRdfStore, mGazetteers); } // terms foreach (Entity gazetteer in gazetteers) { mGazetteers[gazetteer.Uri].ReadTerms(mRdfStore); } }
public bool Match(string[] words, int startIdx, out int len, Gazetteer gazetteer) { int idx = startIdx; len = 0; if (string.Compare(words[idx], mWords[0], gazetteer.mIgnoreCase) != 0) { return false; } // first word must match Utils.CaseType caseType = Utils.GetCaseType(words[idx]); if (!(caseType == Utils.CaseType.ABC || caseType == Utils.CaseType.Abc || caseType == Utils.CaseType.AbC)) { return false; } // *** only for the demo idx++; for (int i = 1; i < mWords.Count; i++) { while (idx < words.Length && gazetteer.IsStopWord(words[idx].ToLower())) { idx++; } // skip stop words if (idx == words.Length) { return false; } if (string.Compare(words[idx++], mWords[i], gazetteer.mIgnoreCase) != 0) { return false; } } len = idx - startIdx; return true; }
//public GazetteerTerm(IEnumerable<string> tokens, IEnumerable<string> posConstraints, bool lemmatize, CaseMatchingType caseMatchingType, bool enabled, Gazetteer gazetteer) //{ // InitializeInstance(tokens, posConstraints, lemmatize, caseMatchingType, enabled, gazetteer); //} public GazetteerTerm(string termDef, Gazetteer gazetteer, CaseMatchingType defaultCaseMatchingType, bool defaultLemmatizeFlag, bool defaultEnabledFlag) { // default settings CaseMatchingType caseMatchingType = defaultCaseMatchingType; bool lemmatize = defaultLemmatizeFlag; bool enabled = defaultEnabledFlag; // parse term settings termDef = mConstraintRegex.Replace(termDef, new MatchEvaluator(delegate(Match m) { ParseGazetteerSettings(m.Value, ref caseMatchingType, ref lemmatize, ref enabled); return ""; })); ArrayList<string> tokens = new ArrayList<string>(); ArrayList<string> posConstraints = new ArrayList<string>(); Match match = mGazetteerMicroTokenRegex.Match(termDef); while (match.Success) { string token = match.Value; string[] tokenParts = token.Split('/'); string posConstraint = null; if (tokenParts.Length == 2) { token = tokenParts[0]; posConstraint = tokenParts[1]; } tokens.Add(token); posConstraints.Add(posConstraint); match = match.NextMatch(); } InitializeInstance(tokens, posConstraints, lemmatize, caseMatchingType, enabled, gazetteer); }