SyntaxTreeNode.GetVersionEntryID, SolarixGrammarEngineNET C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

Fichier : CommonClasses.cs Projet : zaharPonimash/GrammarEngine

    public bool Match(SolarixGrammarEngineNET.SyntaxTreeNode proj, int iver, SolarixGrammarEngineNET.GrammarEngine2 gren)
    {
        if (lexeme != null)
        {
            return(proj.GetWord().Equals(lexeme, StringComparison.InvariantCultureIgnoreCase));
        }

        if (id_lemma != null)
        {
            int ekey = proj.GetVersionEntryID(iver);
            if (id_lemma.Contains(ekey))
            {
                return(true);
            }

            return(false);
        }

        if (pos != null)
        {
            bool pos_matched = false;

            int ekey = proj.GetVersionEntryID(iver);
            if (ekey != -1)
            {
                int id_class = gren.GetEntryClass(ekey);
                pos_matched = pos.Contains(id_class);
            }

            if (!pos_matched)
            {
                return(false);
            }
        }

        if (pairs != null)
        {
            bool contains_all_required_pairs = true;
            foreach (SolarixGrammarEngineNET.CoordPair p in pairs)
            {
                if (!proj.VersionContains(iver, p))
                {
                    contains_all_required_pairs = false;
                    break;
                }
            }

            if (!contains_all_required_pairs)
            {
                return(false);
            }
        }

        return(true);
    }

Exemple #2

0

Afficher le fichier

    string GetTokenVersionLemma(int version_index, SolarixGrammarEngineNET.SyntaxTreeNode token)
    {
        string lemma = string.Empty;
        int    ekey  = token.GetVersionEntryID(version_index);
        string ename = gren.GetEntryName(ekey);

        if (IsUnknownLexem(ename))
        {
            lemma = token.GetWord().ToLower();
        }
        else
        {
            lemma = ename.ToLower();
        }

        return(lemma);
    }

Exemple #3

0

Afficher le fichier

Fichier : Builder_RuleInference.cs Projet : zaharPonimash/GrammarEngine

    public bool Match(SolarixGrammarEngineNET.SyntaxTreeNode token, SolarixGrammarEngineNET.GrammarEngine2 gren)
    {
        if (!string.IsNullOrEmpty(lexem))
        {
            return(lexem.Equals(token.GetWord(), StringComparison.CurrentCultureIgnoreCase));
        }

        // Признаём сопоставимость с любой версией токена.
        int nver = token.VersionCount();

        for (int iver = 0; iver < nver; ++iver)
        {
            int version_ekey = token.GetVersionEntryID(iver);

            if (id_entry != -1 && version_ekey == id_entry)
            {
                return(true);
            }

            if (id_class != -1 && gren.GetEntryClass(version_ekey) == id_class)
            {
                return(true);
            }

            bool pairs_matched = true;
            foreach (SolarixGrammarEngineNET.CoordPair p in pairs)
            {
                if (!token.VersionContains(iver, p))
                {
                    pairs_matched = false;
                    break;
                }
            }

            if (pairs_matched)
            {
                return(true);
            }
        }

        return(true);
    }

Exemple #4

0

Afficher le fichier

    public bool ProcessSample(string line)
    {
        // Морфологический разбор
        using (SolarixGrammarEngineNET.AnalysisResults tokens = gren.AnalyzeMorphology(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_COMPLETE_ONLY))
        {
            int last_word_index = tokens.Count - 1;
            for (int i = 0; i < tokens.Count; ++i)
            {
                SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[i];
                int suffix_id1 = GetTokenSuffix(i, last_word_index, token);

                int nver = token.VersionCount();
                for (int j = 0; j < nver; ++j)
                {
                    int    ver_ekey   = token.GetVersionEntryID(j);
                    string ename      = gren.GetEntryName(ver_ekey);
                    int    suffix_id2 = GetFormSuffix(i, last_word_index, ename);
                }
            }
        }

        using (SolarixGrammarEngineNET.AnalysisResults projs = gren.AnalyzeMorphology(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_TOKENIZE_ONLY))
        {
            int last_word_index = projs.Count - 1;
            for (int i = 0; i < projs.Count; ++i)
            {
                SolarixGrammarEngineNET.SyntaxTreeNode token = projs[i];
                int suffix_id1 = GetTokenSuffix(i, last_word_index, token);

                int nver = token.VersionCount();
                for (int j = 0; j < nver; ++j)
                {
                    int    ver_ekey   = token.GetVersionEntryID(j);
                    string ename      = gren.GetEntryName(ver_ekey);
                    int    suffix_id2 = GetFormSuffix(i, last_word_index, ename);
                }
            }
        }


        return(true);
    }

Exemple #5

0

Afficher le fichier

Fichier : Builder_RuleInference.cs Projet : zaharPonimash/GrammarEngine

    public void ProcessSample(string line, SolarixGrammarEngineNET.AnalysisResults tokens, int left_i, SolarixGrammarEngineNET.GrammarEngine2 gren)
    {
        // В контекст входят слова от left_i по left_i+ctx.len.

        /*
         * // ---- DEBUG
         * System.Text.StringBuilder b = new StringBuilder();
         * for( int i=0; i<ctx.len; ++i )
         * if( i>0 )
         *  b.AppendFormat( " {0}", tokens[left_i+i].GetWord() );
         * else
         *  b.AppendFormat( "{0}", tokens[left_i+i].GetWord() );
         *
         * string context_str = b.ToString();
         * // --- END DEBUG
         */

        // ... todo проверяем, не сопоставляются ли слова с уже имеющимися контекстами.
        for (int i = 0; i < recognizers.Count; ++i)
        {
            ContextRecognizer recognizer = recognizers[i];
            int match = recognizer.Match(tokens, left_i, gren);
            if (match == 1)
            {
                // правило правильно распознало контекст и сняло омонимию.
                recognizer.Success(new MatchedContextInfo(line, tokens, left_i, ctx.len));
            }
            else if (match == 0)
            {
                // правило распознало контекст, но неправильно сняло омонимию.
                recognizer.Fail(new MatchedContextInfo(line, tokens, left_i, ctx.len));
            }
        }

        // Используем сэмпл для генерации правил только в том случае, если омонимичный токен дал 1 вариант словарной статьи.
        bool generate_rule = true;
        int  omonym_ekey   = -1;

        SolarixGrammarEngineNET.SyntaxTreeNode omonym_token = tokens[left_i + ctx.position];
        int omonym_versions = omonym_token.VersionCount();

        for (int iver = 0; iver < omonym_versions; ++iver)
        {
            int ekey = omonym_token.GetVersionEntryID(iver);
            if (omonym_ekey == -1)
            {
                omonym_ekey = ekey;
            }
            else if (omonym_ekey != ekey)
            {
                // омонимичный токен распознан как несколько разных лемм, поэтому не будет генерировать для него правила.
                generate_rule = false;
                break;
            }
        }

        if (generate_rule)
        {
            // Генерируем варианты распознавания контекста, начиная от самого конкретного - через лексемы не-мононимичных слов, для самых
            // общих - частей речи для них же.
            OmonymContextEnumerator ctx_generator = new OmonymContextEnumerator(tokens, ctx.position, left_i, ctx.len, gren);

            for (int i = 0; i < ctx_generator.Count; ++i)
            {
                ContextRecognizer recognizer = ctx_generator[i];

                // проверим, что такого распознавателя еще нет.
                bool is_unique = true;
                foreach (ContextRecognizer r in recognizers)
                {
                    if (r.EqualTokens(recognizer))
                    {
                        is_unique = false;
                        break;
                    }
                }

                if (is_unique)
                {
                    recognizer.Success(new MatchedContextInfo(line, tokens, left_i, ctx.len));
                    recognizers.Add(recognizer);
                }
            }
        }

        return;
    }

Exemple #6

0

Afficher le fichier

    public void Check(
        string line,
        ref int total_word_count,
        ref int error_count_no_filter,
        ref int error_count_with_model
        )
    {
        // Морфологический разбор
        using (SolarixGrammarEngineNET.AnalysisResults tokens = gren.AnalyzeMorphology(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_COMPLETE_ONLY))
        {
            List <List <int> > word2tags     = new List <List <int> >();
            List <int>         selected_tags = new List <int>();

            // Токенизация без использования синтаксических правил
            using (SolarixGrammarEngineNET.AnalysisResults projs = gren.AnalyzeMorphology(line, LanguageID,
                                                                                          SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_TOKENIZE_ONLY))
            {
                if (tokens.Count != projs.Count)
                {
                    return;
                }

                // Преобразуем все проекции каждого слова в варианты распознавания тегов

                List <int> tag_set = new List <int>();

                int start_tag = -1, end_tag = -1;

                //List<string> words = new List<string>();
                bool unmatched_tag = false;

                List <int> suffices        = new List <int>();
                int        last_word_index = tokens.Count - 1;

                for (int i = 0; i < tokens.Count; ++i)
                {
                    SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[i];
                    string word = token.GetWord().ToLower();
                    //   words.Add(word);

                    // наблюдаемый параметр - окончание грамматической формы.
                    int suffix_id = GetTokenSuffix(i, last_word_index, token);
                    suffices.Add(suffix_id);

                    SolarixGrammarEngineNET.SyntaxTreeNode proj = projs[i];
                    List <int> wt = new List <int>();

                    for (int k = 0; k < proj.VersionCount(); ++k)
                    {
                        int    ever      = proj.GetVersionEntryID(k);
                        string ver_entry = gren.GetEntryName(ever);

                        int id_tag = GetFormSuffix(i, last_word_index, ver_entry);

                        if (!wt.Contains(id_tag))
                        {
                            wt.Add(id_tag);
                        }

                        if (!tag_set.Contains(id_tag))
                        {
                            tag_set.Add(id_tag);
                        }

                        if (i == 0)
                        {
                            start_tag = id_tag;
                        }
                        else if (i == tokens.Count - 1)
                        {
                            end_tag = id_tag;
                        }

                        if (wt.Count == 0)
                        {
                            // ни один тег не подошел, это ошибка кодовой книги.
                            unmatched_tag = true;
                        }
                    }

                    word2tags.Add(wt);
                    selected_tags.Add(wt[0]);
                }


                if (unmatched_tag)
                {
                    return;
                }

                // -----------------------------------------
                // Посчитаем ошибки до применения модели
                // -----------------------------------------
                int n_err = 0;

                for (int iword = 1; iword < tokens.Count - 1; ++iword)
                {
                    SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[iword];
                    int required_suffix_id = GetLemmaSuffix(iword, last_word_index, token);

                    int got_suffix_id = selected_tags[iword];
                    if (required_suffix_id != got_suffix_id)
                    {
                        n_err++;
                    }
                }

                error_count_no_filter += n_err;
                total_word_count      += (tokens.Count - 2);

                int Nword  = tokens.Count; // кол-во последовательных шагов - число слов, включая левую и правую границы
                int Nstate = tag_set.Count;

                // Viterbi trellis

                // вероятности для состояний
                double[,] V = new double[Nword, Nstate];
                for (int t = 0; t < Nword; ++t)
                {
                    for (int s = 0; s < Nstate; ++s)
                    {
                        V[t, s] = 0.0;
                    }
                }

                // backpointers для отслеживания лучшего пути
                int[,] BACKPOINTER = new int[Nword, Nstate];
                for (int t = 0; t < Nword; ++t)
                {
                    for (int s = 0; s < Nstate; ++s)
                    {
                        BACKPOINTER[t, s] = -1; // возможно, надо как-то инициализировать дефолтный путь на случай, если найти лучший не получится - то есть надо проставить от начального до конечного.
                    }
                }
                V[0, tag_set.IndexOf(start_tag)] = 1.0; // начальное состояние - стартуем из этого состояния.

                for (int t = 1; t < Nword; ++t)
                {
                    // проставляем вероятность получения состояний на шаге t, исходя из значений на предыдущем шаге.

                    for (int s2 = 0; s2 < Nstate; ++s2) // состояния на шаге t
                    {
                        double max_v           = 0.0;
                        int    best_prev_state = 0;

                        int id_tag2 = tag_set[s2];

                        double b = 0.0;

                        Dictionary <int, double> bx;
                        if (PB.TryGetValue(id_tag2, out bx))
                        {
                            bx.TryGetValue(suffices[t], out b);
                        }

                        for (int s1 = 0; s1 < Nstate; ++s1) // состояния на шаге t-1
                        {
                            int id_tag1 = tag_set[s1];

                            double vt = V[t - 1, s1] * PA[id_tag1, id_tag2] * b;

                            if (vt > max_v)
                            {
                                max_v           = vt;
                                best_prev_state = s1;
                            }
                        }

                        V[t, s2]           = max_v;
                        BACKPOINTER[t, s2] = best_prev_state;
                    }
                }

                // обратный ход по состояниям, указанным в BACKPOINTER.

                int best_state = tag_set.IndexOf(end_tag);

                for (int t = Nword - 1; t > 0; --t)
                {
                    int best_prev_state = BACKPOINTER[t, best_state];

                    int selected_tag = tag_set[best_prev_state];

                    // Делаем вариант распознавания, давший этот токен, первым в списке.
                    // ATT: грубые ошибки выбора тега не допускаем, то есть разрешаем только те теги, которые были
                    // получены при распознавании слова.
                    if (word2tags[t - 1].Contains(selected_tag))
                    {
                        selected_tags[t - 1] = selected_tag;
                    }
                    else
                    {
                        // ... грубая ошибка выбора тега.
                        //    SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[t];
                        //    string word = token.GetWord().ToLower();
                        //    Console.WriteLine( "Грубая ошибка выбора тега - слово {0}", word );
                    }

                    best_state = best_prev_state;
                }

                // Теперь проверяем количество ошибок в выборе леммы
                for (int iword = 1; iword < tokens.Count - 1; ++iword)
                {
                    // это то, что должно получиться
                    SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[iword];

                    int    ekey1        = token.GetEntryID();
                    string ename        = gren.GetEntryName(ekey1);
                    string lemma_suffix = GetSuffix(ename);

                    // а это то, что получилось у модели
                    int tag = selected_tags[iword];
                    if (tag != -1)
                    {
                        string model_suffix = GetSuffixById(tag);
                        if (model_suffix != lemma_suffix)
                        {
                            error_count_with_model++;
                        }
                    }
                }
            }
        }

        return;
    }

Exemple #7

0

Afficher le fichier

Fichier : CommonClasses.cs Projet : zaharPonimash/GrammarEngine

    public bool Match(SolarixGrammarEngineNET.SyntaxTreeNode token, SolarixGrammarEngineNET.GrammarEngine2 gren)
    {
        if (lexeme != null)
        {
            return(token.GetWord().Equals(lexeme, StringComparison.InvariantCultureIgnoreCase));
        }

        if (id_lemma != null)
        {
            for (int iver = 0; iver < token.VersionCount(); ++iver)
            {
                int ekey = token.GetVersionEntryID(iver);
                if (id_lemma.Contains(ekey))
                {
                    return(true);
                }
            }

            return(false);
        }

        if (pos != null)
        {
            bool pos_matched = false;

            for (int iver = 0; iver < token.VersionCount(); ++iver)
            {
                int ekey = token.GetVersionEntryID(iver);
                if (ekey != -1)
                {
                    int id_class = gren.GetEntryClass(ekey);
                    pos_matched = pos.Contains(id_class);
                    if (pos_matched)
                    {
                        break;
                    }
                }
            }

            if (!pos_matched)
            {
                return(false);
            }
        }

        if (pairs != null && pairs.Count > 0)
        {
            bool a_version_matched = false;

            for (int iver = 0; iver < token.VersionCount(); ++iver)
            {
                bool ver_ok = true;

                foreach (SolarixGrammarEngineNET.CoordPair p in pairs)
                {
                    if (!token.VersionContains(iver, p))
                    {
                        ver_ok = false;
                        break;
                    }
                }

                if (ver_ok)
                {
                    a_version_matched = true;
                    break;
                }
            }

            return(a_version_matched);
        }

        return(true);
    }

Exemple #8

0

Afficher le fichier

Fichier : Builder_BayesSuffix.cs Projet : zaharPonimash/GrammarEngine

    public void Check(
        string line,
        ref int total_word_count,
        ref int error_count_no_filter,
        ref int error_count_with_model
        )
    {
        // Морфологический разбор
        using (SolarixGrammarEngineNET.AnalysisResults tokens = gren.AnalyzeMorphology(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_COMPLETE_ONLY))
        {
            // Токенизация без использования синтаксических правил
            using (SolarixGrammarEngineNET.AnalysisResults projs = gren.AnalyzeMorphology(line, LanguageID,
                                                                                          SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_TOKENIZE_ONLY /*| SolarixGrammarEngineNET.GrammarEngine.SOL_GREN_DISABLE_FILTERS*/))
            {
                if (tokens.Count != projs.Count)
                {
                    return;
                }


                int last_word_index = projs.Count - 1;

                // -----------------------------------------
                // Посчитаем ошибки до применения модели
                // -----------------------------------------
                int n_err = 0;
                for (int iword = 1; iword < last_word_index; ++iword)
                {
                    SolarixGrammarEngineNET.SyntaxTreeNode proj = projs[iword];
                    int ekey0     = proj.GetEntryID();
                    int id_class0 = gren.GetEntryClass(ekey0);

                    // Совпадает с точным значением?
                    SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[iword];
                    int ekey1     = token.GetEntryID();
                    int id_class1 = gren.GetEntryClass(ekey1);

                    if (id_class0 != id_class1)
                    {
                        n_err++;
                    }
                }

                error_count_no_filter += n_err;
                total_word_count      += (tokens.Count - 2);



                List <int> n_pos     = new List <int>(); // кол-во разных частей речи для каждого токена
                List <int> word2tags = new List <int>();

                // Преобразуем все проекции каждого слова в варианты распознавания тегов
                for (int i = 0; i < tokens.Count; ++i)
                {
                    SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[i];
                    int index = GetTokenSuffix(i, last_word_index, token);

                    word2tags.Add(index);

                    // На сколько разных частей речи проецируется данное слово
                    List <int> pos_list = new List <int>();
                    for (int k = 0; k < projs[i].VersionCount(); ++k)
                    {
                        int ekey0     = projs[i].GetVersionEntryID(k);
                        int id_class0 = gren.GetEntryClass(ekey0);
                        if (!pos_list.Contains(id_class0))
                        {
                            pos_list.Add(id_class0);
                        }
                    }

                    n_pos.Add(pos_list.Count);
                }


                List <Dictionary <int, int> > pos_score = new List <Dictionary <int, int> >();
                List <int> pos_score_order = new List <int>();

                // Инициализируем вектор частей речи значениями, которые соответствуют
                // чамым частотным словоформам.
                for (int iword = 0; iword < tokens.Count - 1; ++iword)
                {
                    SolarixGrammarEngineNET.SyntaxTreeNode proj = projs[iword];

                    Dictionary <int, int> p = new Dictionary <int, int>();

                    for (int iproj = 0; iproj < proj.VersionCount(); ++iproj)
                    {
                        int ekey     = proj.GetVersionEntryID(iproj);
                        int id_class = gren.GetEntryClass(ekey);

                        if (!p.ContainsKey(id_class))
                        {
                            if (iproj == 0)
                            {
                                p.Add(id_class, 1);
                            }
                            else
                            {
                                p.Add(id_class, 0);
                            }
                        }
                    }

                    pos_score.Add(p);
                    pos_score_order.Add(1);
                }


                // ---------------------------------
                // теперь применим модель
                // ---------------------------------
                bool use_4grams = true;
                bool use_3grams = true;
                bool use_2grams = true;

                for (int iword = 1; iword < tokens.Count - 1; ++iword)
                {
                    string word = tokens[iword].GetWord();

                    bool applied = false;

                    // ==============
                    // ТЕТРАГРАММЫ
                    // ==============

                    if (use_4grams && !applied && iword > 2)
                    {
                        if (n_pos[iword] > 1) // Выбираем POS для iword на основе iword-3,iword-2,iword-1
                        {
                            int tag0 = word2tags[iword - 3];
                            int tag1 = word2tags[iword - 2];
                            int tag2 = word2tags[iword - 1];

                            List <NGram4> n4_list;
                            Int3          k = new Int3(tag0, tag1, tag2);

                            if (tag0_2_ngram4.TryGetValue(k, out n4_list))
                            {
                                // Перебираем варианты, которые вытекают из наличия тегов tag0,tag1,tag2 и прибавляем очки соответствующим частям речи.
                                foreach (NGram4 n4_probe in n4_list)
                                {
                                    int        tag3 = n4_probe.tags3;
                                    TagMatcher m    = selectors[tag3];

                                    List <KeyValuePair <int, int> > changes = new List <KeyValuePair <int, int> >();

                                    Dictionary <int, int> pos2score = pos_score[iword];

                                    foreach (KeyValuePair <int, int> p in pos2score)
                                    {
                                        if (m.MatchPartOfSpeech(p.Key))
                                        {
                                            int m_freq = ngrams4[n4_probe];
                                            changes.Add(new KeyValuePair <int, int>(p.Key, m_freq));
                                            applied = true;
                                        }
                                    }

                                    foreach (var kv in changes)
                                    {
                                        pos2score[kv.Key] = pos2score[kv.Key] + kv.Value;
                                    }

                                    pos_score_order[iword] = 4;
                                }
                            }
                        }
                        else if (n_pos[iword - 1] > 1 && pos_score_order[iword - 1] < 4) // Выбираем POS для iword-1 на основе iword-3,iword-2,iword
                        {
                            int tag0 = word2tags[iword - 3];
                            int tag1 = word2tags[iword - 2];
                            int tag3 = word2tags[iword];

                            List <NGram4> n4_list;
                            Int3          k = new Int3(tag0, tag1, tag3);

                            if (tag1_2_ngram4.TryGetValue(k, out n4_list))
                            {
                                // Перебираем варианты, которые вытекают из наличия тегов tag0,tag1,tag2 и прибавляем очки соответствующим частям речи.
                                foreach (NGram4 n4_probe in n4_list)
                                {
                                    int        tag2 = n4_probe.tags2;
                                    TagMatcher m    = selectors[tag2];

                                    List <KeyValuePair <int, int> > changes = new List <KeyValuePair <int, int> >();

                                    Dictionary <int, int> pos2score = pos_score[iword];

                                    foreach (KeyValuePair <int, int> p in pos2score)
                                    {
                                        if (m.MatchPartOfSpeech(p.Key))
                                        {
                                            int m_freq = ngrams4_1[n4_probe];
                                            changes.Add(new KeyValuePair <int, int>(p.Key, m_freq));
                                            applied = true;
                                        }
                                    }

                                    foreach (var kv in changes)
                                    {
                                        pos2score[kv.Key] = pos2score[kv.Key] + kv.Value;
                                    }

                                    pos_score_order[iword - 1] = 4;
                                }
                            }
                        }
                    }



                    // ==============
                    // ТРИГРАММЫ
                    // ==============

                    if (use_3grams && !applied && iword > 1)
                    {
                        if (n_pos[iword] > 1) // Выбираем POS для iword на основе iword-2,iword-1
                        {
                            {
                                int tag0 = word2tags[iword - 2];
                                int tag1 = word2tags[iword - 1];

                                List <NGram3> n3_list;
                                Int2          k = new Int2(tag0, tag1);

                                if (tag0_2_ngram3.TryGetValue(k, out n3_list))
                                {
                                    // Перебираем варианты, которые вытекают из наличия тегов tag0,tag1, и прибавляем очки соответствующим частям речи.
                                    foreach (NGram3 n3_probe in n3_list)
                                    {
                                        int        tag2 = n3_probe.tags2;
                                        TagMatcher m    = selectors[tag2];

                                        List <KeyValuePair <int, int> > changes = new List <KeyValuePair <int, int> >();

                                        Dictionary <int, int> pos2score = pos_score[iword];

                                        foreach (KeyValuePair <int, int> p in pos2score)
                                        {
                                            if (m.MatchPartOfSpeech(p.Key))
                                            {
                                                int m_freq = ngrams3[n3_probe];
                                                changes.Add(new KeyValuePair <int, int>(p.Key, m_freq));
                                                applied = true;
                                            }
                                        }

                                        foreach (var kv in changes)
                                        {
                                            pos2score[kv.Key] = pos2score[kv.Key] + kv.Value;
                                        }
                                    }
                                }
                            }


                            if (iword < last_word_index)
                            {
                                // iword-1 --> iword <-- iword+1

                                int tag0 = word2tags[iword - 1];
                                int tag2 = word2tags[iword + 1];

                                List <NGram3> n3_list;
                                Int2          k = new Int2(tag0, tag2);

                                if (tag1_2_ngram3.TryGetValue(k, out n3_list))
                                {
                                    // Перебираем варианты, которые вытекают из наличия тегов tag0,tag2, и прибавляем очки соответствующим частям речи.
                                    foreach (NGram3 n3_probe in n3_list)
                                    {
                                        int        tag1 = n3_probe.tags1;
                                        TagMatcher m    = selectors[tag1];

                                        List <KeyValuePair <int, int> > changes = new List <KeyValuePair <int, int> >();

                                        Dictionary <int, int> pos2score = pos_score[iword];

                                        foreach (KeyValuePair <int, int> p in pos2score)
                                        {
                                            if (m.MatchPartOfSpeech(p.Key))
                                            {
                                                int m_freq = ngrams3_1[n3_probe];
                                                changes.Add(new KeyValuePair <int, int>(p.Key, m_freq));
                                                applied = true;
                                            }
                                        }

                                        foreach (var kv in changes)
                                        {
                                            pos2score[kv.Key] = pos2score[kv.Key] + kv.Value;
                                        }

                                        pos_score_order[iword] = 3;
                                    }
                                }
                            }
                        }
                        else if (n_pos[iword - 1] > 1 && pos_score_order[iword - 1] < 3) // Выбираем POS для iword-1 на основе iword-2,iword
                        {
                            int tag0 = word2tags[iword - 2];
                            int tag2 = word2tags[iword];

                            List <NGram3> n3_list;
                            Int2          k = new Int2(tag0, tag2);

                            if (tag1_2_ngram3.TryGetValue(k, out n3_list))
                            {
                                // Перебираем варианты, которые вытекают из наличия тегов tag0,tag2, и прибавляем очки соответствующим частям речи.
                                foreach (NGram3 n3_probe in n3_list)
                                {
                                    int        tag1 = n3_probe.tags1;
                                    TagMatcher m    = selectors[tag1];

                                    List <KeyValuePair <int, int> > changes = new List <KeyValuePair <int, int> >();

                                    Dictionary <int, int> pos2score = pos_score[iword];

                                    foreach (KeyValuePair <int, int> p in pos2score)
                                    {
                                        if (m.MatchPartOfSpeech(p.Key))
                                        {
                                            int m_freq = ngrams3_1[n3_probe];
                                            changes.Add(new KeyValuePair <int, int>(p.Key, m_freq));
                                            applied = true;
                                        }
                                    }

                                    foreach (var kv in changes)
                                    {
                                        pos2score[kv.Key] = pos2score[kv.Key] + kv.Value;
                                    }

                                    pos_score_order[iword] = 3;
                                }
                            }
                        }
                    }


                    // ==============
                    // ДИГРАММЫ
                    // ==============

                    if (use_2grams && !applied && iword > 1)
                    {
                        if (n_pos[iword] > 1) // Выбираем POS для iword на основе iword-1
                        {
                            {
                                int tag0 = word2tags[iword - 1];

                                List <NGram2> n2_list;
                                if (tag0_2_ngram2.TryGetValue(tag0, out n2_list))
                                {
                                    // Перебираем варианты, которые вытекают из наличия тега tag0, и прибавляем очки соответствующим частям речи.
                                    foreach (NGram2 n2_probe in n2_list)
                                    {
                                        int        tag1 = n2_probe.tags1;
                                        TagMatcher m    = selectors[tag1];

                                        List <KeyValuePair <int, int> > changes = new List <KeyValuePair <int, int> >();

                                        Dictionary <int, int> pos2score = pos_score[iword];

                                        foreach (KeyValuePair <int, int> p in pos2score)
                                        {
                                            if (m.MatchPartOfSpeech(p.Key))
                                            {
                                                int m_freq = ngrams2[n2_probe];
                                                changes.Add(new KeyValuePair <int, int>(p.Key, m_freq));
                                                applied = true;
                                            }
                                        }

                                        foreach (var kv in changes)
                                        {
                                            pos2score[kv.Key] = pos2score[kv.Key] + kv.Value;
                                        }

                                        pos_score_order[iword] = 2;
                                    }
                                }
                            }

                            if (iword < last_word_index)
                            {
                                // iword <-- iword+1
                                int tag1 = word2tags[iword + 1];

                                List <NGram2> n2_list;
                                if (tag1_2_ngram2.TryGetValue(tag1, out n2_list))
                                {
                                    // Перебираем варианты, которые вытекают из наличия тега tag1, и прибавляем очки соответствующим частям речи.
                                    foreach (NGram2 n2_probe in n2_list)
                                    {
                                        int        tag0 = n2_probe.tags0;
                                        TagMatcher m    = selectors[tag0];

                                        List <KeyValuePair <int, int> > changes = new List <KeyValuePair <int, int> >();

                                        Dictionary <int, int> pos2score = pos_score[iword];

                                        foreach (KeyValuePair <int, int> p in pos2score)
                                        {
                                            if (m.MatchPartOfSpeech(p.Key))
                                            {
                                                int m_freq = ngrams2_1[n2_probe];
                                                changes.Add(new KeyValuePair <int, int>(p.Key, m_freq));
                                                applied = true;
                                            }
                                        }

                                        foreach (var kv in changes)
                                        {
                                            pos2score[kv.Key] = pos2score[kv.Key] + kv.Value;
                                        }

                                        pos_score_order[iword] = 2;
                                    }
                                }
                            }
                        }
                        else if (n_pos[iword - 1] > 1 && pos_score_order[iword - 1] == 1) // Выбираем POS для iword-1 на основе iword
                        {
                            int tag1 = word2tags[iword];

                            List <NGram2> n2_list;
                            if (tag1_2_ngram2.TryGetValue(tag1, out n2_list))
                            {
                                // Перебираем варианты, которые вытекают из наличия тега tag1, и прибавляем очки соответствующим частям речи.
                                foreach (NGram2 n2_probe in n2_list)
                                {
                                    int        tag0 = n2_probe.tags0;
                                    TagMatcher m    = selectors[tag0];

                                    List <KeyValuePair <int, int> > changes = new List <KeyValuePair <int, int> >();

                                    Dictionary <int, int> pos2score = pos_score[iword - 1];

                                    foreach (KeyValuePair <int, int> p in pos2score)
                                    {
                                        if (m.MatchPartOfSpeech(p.Key))
                                        {
                                            int m_freq = ngrams2_1[n2_probe];
                                            changes.Add(new KeyValuePair <int, int>(p.Key, m_freq));
                                            applied = true;
                                        }
                                    }

                                    foreach (var kv in changes)
                                    {
                                        pos2score[kv.Key] = pos2score[kv.Key] + kv.Value;
                                    }

                                    pos_score_order[iword - 1] = 2;
                                }
                            }
                        }
                    }
                }



                // Все вероятности перехода учтены.

                // Совпадает ли selected_id_class с требуемыми значениями?
                for (int iword = 1; iword < projs.Count - 1; ++iword)
                {
                    Dictionary <int, int> word_pos_scores = pos_score[iword];
                    int best_score = 0;
                    int best_pos   = -1;
                    foreach (KeyValuePair <int, int> k in word_pos_scores)
                    {
                        if (k.Value > best_score)
                        {
                            best_score = k.Value;
                            best_pos   = k.Key;
                        }
                    }

                    SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[iword];
                    int ekey1     = token.GetEntryID();
                    int id_class1 = gren.GetEntryClass(ekey1);

                    if (best_pos != id_class1)
                    {
                        error_count_with_model++;
                    }
                }
            }
        }


        return;
    }

C# (CSharp) SolarixGrammarEngineNET SyntaxTreeNode.GetVersionEntryID Exemples

GetVersionEntryID() public méthode

public GetVersionEntryID ( int version_index ) : int
version_index	int
Résultat	int