internal IEnumerable<InflectionState> findMatching(bool canUseKatakana, List<string> POS, string text, int position) { List<InflectionState> results = new List<InflectionState>(); //Logger.log("Finding inflections in: " + text.Substring(position)); List<Tuple<InflectionTrie, InflectionState, string>> cur = new List<Tuple<InflectionTrie,InflectionState,string>>(); foreach (string pos in POS) { InflectionTrie trie; if (index.TryGetValue(pos, out trie)) { cur.Add(Tuple.Create<InflectionTrie, InflectionState, string>(trie, null, pos)); } } int offset = 0; bool hasEmptySuf = false; while (cur.Count > 0 && position + offset <= text.Length) { //Logger.log("POS List: " + string.Join(", ", cur.Select((q) => q.Item3))); List<Tuple<InflectionTrie, InflectionState, string>> added = new List<Tuple<InflectionTrie, InflectionState, string>>(); List<Tuple<InflectionTrie, InflectionState, string>> added2 = new List<Tuple<InflectionTrie, InflectionState, string>>(); List<Tuple<InflectionTrie, InflectionState, string>> next = new List<Tuple<InflectionTrie, InflectionState, string>>(); HashSet<string> addedPOS = new HashSet<string>(); foreach (var it in cur) { foreach (var link in it.Item1.linkForms) { if (addedPOS.Add(link.NextType)) { InflectionState linked = new InflectionState("", link, it.Item2 == null ? null : it.Item2.tense); added.Add(Tuple.Create(index[link.NextType], linked, it.Item3)); //Logger.log("Added: " + link.NextType); } } } foreach (var it in added) { foreach (var link in it.Item1.linkForms) { if (addedPOS.Add(link.NextType)) { InflectionState linked = new InflectionState("", link, it.Item2 == null ? null : it.Item2.tense); added2.Add(Tuple.Create(index[link.NextType], linked, it.Item3)); //Logger.log("Added2: " + link.NextType); } } } InflectionState newState = null; char c; if (position + offset < text.Length) { c = text[position + offset]; if (canUseKatakana) { c = TextUtils.katakanaToHiraganaChar(c); } } else { c = '\0'; } foreach (var it in cur.Concat(added).Concat(added2)) { foreach (var form in it.Item1.forms) { //Logger.log("Got form: " + form.Suffix + " (" + it.Item3 + ")"); if (newState == null) { newState = new InflectionState(text.Substring(position, offset), form, it.Item2 == null ? null : it.Item2.tense); } else { newState.updateTense(form.Tense); } newState.addPOS(it.Item3); } InflectionTrie nextTrie; if (it.Item1.children.TryGetValue(c, out nextTrie)) { //Logger.log(it.Item3 + ": going deeper to " + c); next.Add(Tuple.Create(nextTrie, it.Item2, it.Item3)); } } if (newState != null) { if (!newState.suffix.EndsWith("てい") && !newState.suffix.EndsWith("でい") && !newState.suffix.EndsWith("るた")) { // dirty HACK. bad bad me :( if (newState.suffix == "") { hasEmptySuf = true; } results.Add(newState); } } cur = next; offset += 1; } if (!hasEmptySuf && (POS == null || !knownPOS.IsSupersetOf(POS))) { InflectionState state = new InflectionState(""); results.Add(state); } return results; }
internal IEnumerable <InflectionState> findMatching(bool canUseKatakana, List <string> POS, string text, int position) { List <InflectionState> results = new List <InflectionState>(); //Logger.log("Finding inflections in: " + text.Substring(position)); List <Tuple <InflectionTrie, InflectionState, string> > cur = new List <Tuple <InflectionTrie, InflectionState, string> >(); foreach (string pos in POS) { InflectionTrie trie; if (index.TryGetValue(pos, out trie)) { cur.Add(Tuple.Create <InflectionTrie, InflectionState, string>(trie, null, pos)); } } int offset = 0; bool hasEmptySuf = false; while (cur.Count > 0 && position + offset <= text.Length) { //Logger.log("POS List: " + string.Join(", ", cur.Select((q) => q.Item3))); List <Tuple <InflectionTrie, InflectionState, string> > added = new List <Tuple <InflectionTrie, InflectionState, string> >(); List <Tuple <InflectionTrie, InflectionState, string> > added2 = new List <Tuple <InflectionTrie, InflectionState, string> >(); List <Tuple <InflectionTrie, InflectionState, string> > next = new List <Tuple <InflectionTrie, InflectionState, string> >(); HashSet <string> addedPOS = new HashSet <string>(); foreach (var it in cur) { foreach (var link in it.Item1.linkForms) { if (addedPOS.Add(link.NextType)) { InflectionState linked = new InflectionState("", link, it.Item2 == null ? null : it.Item2.tense); added.Add(Tuple.Create(index[link.NextType], linked, it.Item3)); //Logger.log("Added: " + link.NextType); } } } foreach (var it in added) { foreach (var link in it.Item1.linkForms) { if (addedPOS.Add(link.NextType)) { InflectionState linked = new InflectionState("", link, it.Item2 == null ? null : it.Item2.tense); added2.Add(Tuple.Create(index[link.NextType], linked, it.Item3)); //Logger.log("Added2: " + link.NextType); } } } InflectionState newState = null; char c; if (position + offset < text.Length) { c = text[position + offset]; if (canUseKatakana) { c = TextUtils.katakanaToHiraganaChar(c); } } else { c = '\0'; } foreach (var it in cur.Concat(added).Concat(added2)) { foreach (var form in it.Item1.forms) { //Logger.log("Got form: " + form.Suffix + " (" + it.Item3 + ")"); if (newState == null) { newState = new InflectionState(text.Substring(position, offset), form, it.Item2 == null ? null : it.Item2.tense); } else { newState.updateTense(form.Tense); } newState.addPOS(it.Item3); } InflectionTrie nextTrie; if (it.Item1.children.TryGetValue(c, out nextTrie)) { //Logger.log(it.Item3 + ": going deeper to " + c); next.Add(Tuple.Create(nextTrie, it.Item2, it.Item3)); } } if (newState != null) { if (!newState.suffix.EndsWith("てい") && !newState.suffix.EndsWith("でい") && !newState.suffix.EndsWith("るた")) // dirty HACK. bad bad me :( { if (newState.suffix == "") { hasEmptySuf = true; } results.Add(newState); } } cur = next; offset += 1; } if (!hasEmptySuf && (POS == null || !knownPOS.IsSupersetOf(POS))) { InflectionState state = new InflectionState(""); results.Add(state); } return(results); }