Exemple #1
0
 internal IEnumerable<InflectionState> findMatching(bool canUseKatakana, List<string> POS, string text, int position)
 {
     List<InflectionState> results = new List<InflectionState>();
     //Logger.log("Finding inflections in: " + text.Substring(position));
     List<Tuple<InflectionTrie, InflectionState, string>> cur = new List<Tuple<InflectionTrie,InflectionState,string>>();
     foreach (string pos in POS) {
         InflectionTrie trie;
         if (index.TryGetValue(pos, out trie)) {
             cur.Add(Tuple.Create<InflectionTrie, InflectionState, string>(trie, null, pos));
         }
     }
     int offset = 0;
     bool hasEmptySuf = false;
     while (cur.Count > 0 && position + offset <= text.Length) {
         //Logger.log("POS List: " + string.Join(", ", cur.Select((q) => q.Item3)));
         List<Tuple<InflectionTrie, InflectionState, string>> added = new List<Tuple<InflectionTrie, InflectionState, string>>();
         List<Tuple<InflectionTrie, InflectionState, string>> added2 = new List<Tuple<InflectionTrie, InflectionState, string>>();
         List<Tuple<InflectionTrie, InflectionState, string>> next = new List<Tuple<InflectionTrie, InflectionState, string>>();
         HashSet<string> addedPOS = new HashSet<string>();
         foreach (var it in cur) {
             foreach (var link in it.Item1.linkForms) {
                 if (addedPOS.Add(link.NextType)) {
                     InflectionState linked = new InflectionState("", link, it.Item2 == null ? null : it.Item2.tense);
                     added.Add(Tuple.Create(index[link.NextType], linked, it.Item3));
                     //Logger.log("Added: " + link.NextType);
                 }
             }
         }
         foreach (var it in added) {
             foreach (var link in it.Item1.linkForms) {
                 if (addedPOS.Add(link.NextType)) {
                     InflectionState linked = new InflectionState("", link, it.Item2 == null ? null : it.Item2.tense);
                     added2.Add(Tuple.Create(index[link.NextType], linked, it.Item3));
                     //Logger.log("Added2: " + link.NextType);
                 }
             }
         }
         InflectionState newState = null;
         char c;
         if (position + offset < text.Length) {
             c = text[position + offset];
             if (canUseKatakana) {
                 c = TextUtils.katakanaToHiraganaChar(c);
             }
         } else {
             c = '\0';
         }
         foreach (var it in cur.Concat(added).Concat(added2)) {
             foreach (var form in it.Item1.forms) {
                 //Logger.log("Got form: " + form.Suffix + " (" + it.Item3 + ")");
                 if (newState == null) {
                     newState = new InflectionState(text.Substring(position, offset), form, it.Item2 == null ? null : it.Item2.tense);
                 } else {
                     newState.updateTense(form.Tense);
                 }
                 newState.addPOS(it.Item3);
             }
             InflectionTrie nextTrie;
             if (it.Item1.children.TryGetValue(c, out nextTrie)) {
                 //Logger.log(it.Item3 + ": going deeper to " + c);
                 next.Add(Tuple.Create(nextTrie, it.Item2, it.Item3));
             }
         }
         if (newState != null) {
             if (!newState.suffix.EndsWith("てい") && !newState.suffix.EndsWith("でい")
                 && !newState.suffix.EndsWith("るた")) { // dirty HACK. bad bad me :(
                 if (newState.suffix == "") {
                     hasEmptySuf = true;
                 }
                 results.Add(newState);
             }
         }
         cur = next;
         offset += 1;
     }
     if (!hasEmptySuf && (POS == null || !knownPOS.IsSupersetOf(POS))) {
         InflectionState state = new InflectionState("");
         results.Add(state);
     }
     return results;
 }
Exemple #2
0
        internal IEnumerable <InflectionState> findMatching(bool canUseKatakana, List <string> POS, string text, int position)
        {
            List <InflectionState> results = new List <InflectionState>();
            //Logger.log("Finding inflections in: " + text.Substring(position));
            List <Tuple <InflectionTrie, InflectionState, string> > cur = new List <Tuple <InflectionTrie, InflectionState, string> >();

            foreach (string pos in POS)
            {
                InflectionTrie trie;
                if (index.TryGetValue(pos, out trie))
                {
                    cur.Add(Tuple.Create <InflectionTrie, InflectionState, string>(trie, null, pos));
                }
            }
            int  offset      = 0;
            bool hasEmptySuf = false;

            while (cur.Count > 0 && position + offset <= text.Length)
            {
                //Logger.log("POS List: " + string.Join(", ", cur.Select((q) => q.Item3)));
                List <Tuple <InflectionTrie, InflectionState, string> > added  = new List <Tuple <InflectionTrie, InflectionState, string> >();
                List <Tuple <InflectionTrie, InflectionState, string> > added2 = new List <Tuple <InflectionTrie, InflectionState, string> >();
                List <Tuple <InflectionTrie, InflectionState, string> > next   = new List <Tuple <InflectionTrie, InflectionState, string> >();
                HashSet <string> addedPOS = new HashSet <string>();
                foreach (var it in cur)
                {
                    foreach (var link in it.Item1.linkForms)
                    {
                        if (addedPOS.Add(link.NextType))
                        {
                            InflectionState linked = new InflectionState("", link, it.Item2 == null ? null : it.Item2.tense);
                            added.Add(Tuple.Create(index[link.NextType], linked, it.Item3));
                            //Logger.log("Added: " + link.NextType);
                        }
                    }
                }
                foreach (var it in added)
                {
                    foreach (var link in it.Item1.linkForms)
                    {
                        if (addedPOS.Add(link.NextType))
                        {
                            InflectionState linked = new InflectionState("", link, it.Item2 == null ? null : it.Item2.tense);
                            added2.Add(Tuple.Create(index[link.NextType], linked, it.Item3));
                            //Logger.log("Added2: " + link.NextType);
                        }
                    }
                }
                InflectionState newState = null;
                char            c;
                if (position + offset < text.Length)
                {
                    c = text[position + offset];
                    if (canUseKatakana)
                    {
                        c = TextUtils.katakanaToHiraganaChar(c);
                    }
                }
                else
                {
                    c = '\0';
                }
                foreach (var it in cur.Concat(added).Concat(added2))
                {
                    foreach (var form in it.Item1.forms)
                    {
                        //Logger.log("Got form: " + form.Suffix + " (" + it.Item3 + ")");
                        if (newState == null)
                        {
                            newState = new InflectionState(text.Substring(position, offset), form, it.Item2 == null ? null : it.Item2.tense);
                        }
                        else
                        {
                            newState.updateTense(form.Tense);
                        }
                        newState.addPOS(it.Item3);
                    }
                    InflectionTrie nextTrie;
                    if (it.Item1.children.TryGetValue(c, out nextTrie))
                    {
                        //Logger.log(it.Item3 + ": going deeper to " + c);
                        next.Add(Tuple.Create(nextTrie, it.Item2, it.Item3));
                    }
                }
                if (newState != null)
                {
                    if (!newState.suffix.EndsWith("てい") && !newState.suffix.EndsWith("でい") &&
                        !newState.suffix.EndsWith("るた"))      // dirty HACK. bad bad me :(
                    {
                        if (newState.suffix == "")
                        {
                            hasEmptySuf = true;
                        }
                        results.Add(newState);
                    }
                }
                cur     = next;
                offset += 1;
            }
            if (!hasEmptySuf && (POS == null || !knownPOS.IsSupersetOf(POS)))
            {
                InflectionState state = new InflectionState("");
                results.Add(state);
            }
            return(results);
        }