Ejemplo n.º 1
0
    void AddDistance(SolarixGrammarEngineNET.SyntaxTreeNode root, SolarixGrammarEngineNET.SyntaxTreeNode node, int distance)
    {
        WordentryDistance x = new WordentryDistance();

        x.id_entry1 = root.GetEntryID();
        x.id_entry2 = node.GetEntryID();
        if (x.id_entry1 > x.id_entry2)
        {
            x.id_entry2 = root.GetEntryID();
            x.id_entry1 = node.GetEntryID();
        }

        WordentryDistance y;

        if (distance_matrix.TryGetValue(x, out y))
        {
            y.sum_distance  += distance;
            y.sum_distance2 += distance * distance;
            y.N++;
        }
        else
        {
            x.N              = 1;
            x.sum_distance  += distance;
            x.sum_distance2 += distance * distance;
            distance_matrix.Add(x, x);
        }

        return;
    }
    private void SetLabel(SolarixGrammarEngineNET.SyntaxTreeNode node, string label, bool recursive)
    {
        if (recursive)
        {
            List <int> indeces = new List <int>();
            CollectSubtreeNodeIndeces(node, indeces);

            int k = 0;
            foreach (int index in indeces.OrderBy(z => z))
            {
                if (k == 0)
                {
                    labels[index + 1].Insert(0, label);
                }
                else
                {
                    labels[index + 1].Insert(0, label);
                }

                k++;
            }
        }
        else
        {
            int index = node.GetWordPosition();
            labels[index + 1].Insert(0, label);
        }

        return;
    }
Ejemplo n.º 3
0
    public bool ProcessSample_WordEntryOnly(SampleData sample)
    {
        if (sample.morphology == null)
        {
            sample.morphology = gren.AnalyzeMorphology(sample.sample, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_TOKENIZE_ONLY);
        }

        for (int iword = 1; iword < sample.morphology.Count - 1; ++iword)
        {
            SolarixGrammarEngineNET.SyntaxTreeNode token = sample.morphology[iword];
            string word = token.GetWord().ToLower();

            int id_entry = token.GetEntryID();

            int f;
            if (wordentry_stat.TryGetValue(id_entry, out f))
            {
                wordentry_stat[id_entry] = f + 1;
            }
            else
            {
                wordentry_stat.Add(id_entry, 1);
            }
        }

        return(true);
    }
    public int Match(SolarixGrammarEngineNET.AnalysisResults tokens, int left_i, SolarixGrammarEngineNET.GrammarEngine2 gren)
    {
        // Проверяем, что не-омонимичные термы сопоставляются.
        bool m = true;

        for (int iterm = 0; iterm < points.Count; ++iterm)
        {
            if (points[iterm] != null)
            {
                SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[left_i + iterm];
                if (!points[iterm].Match(token, gren))
                {
                    m = false;
                    break;
                }
            }
        }

        if (m)
        {
            // Осталось проверить, правильно ли снята омонимия.
            SolarixGrammarEngineNET.SyntaxTreeNode omonym_token = tokens[left_i + omonym_point.GetPosition()];
            return(omonym_point.Match(omonym_token) ? 1 : 0);
        }

        return(-1);
    }
Ejemplo n.º 5
0
    static int GetPOS(SolarixGrammarEngineNET.GrammarEngine2 gren, SolarixGrammarEngineNET.SyntaxTreeNode node)
    {
        int id_entry = node.GetEntryID();
        int pos_id   = gren.GetEntryClass(id_entry);

        return(pos_id);
    }
Ejemplo n.º 6
0
    public bool ProcessSample(SampleData sample, bool train_sample, bool test_sample)
    {
        if (wrt_train == null)
        {
            wrt_train = new System.IO.StreamWriter("syntax_neuro_train.txt");
            wrt_test  = new System.IO.StreamWriter("syntax_neuro_test.txt");
        }

        System.IO.StreamWriter wrt = train_sample ? wrt_train : wrt_test;

        if (sample.syntax_tree.Count == 3)
        {
            SolarixGrammarEngineNET.SyntaxTreeNode root = sample.syntax_tree[1];

            Dictionary <int, int>    node2parent = new Dictionary <int, int>();
            Dictionary <int, string> node2word   = new Dictionary <int, string>();

            node2parent.Add(root.GetWordPosition(), -1);

            CollectEdges(root, node2parent, node2word);

            foreach (int index in node2word.Select(z => z.Key).OrderBy(z => z))
            {
                wrt.WriteLine("{0}\t{1}\t{2}", index, node2word[index], node2parent[index]);
            }

            wrt.WriteLine("");
        }

        return(true);
    }
Ejemplo n.º 7
0
    private void AssembleEdges(SolarixGrammarEngineNET.SyntaxTreeNode node, List <Edge4Stat> edges, Dictionary <string, NodeLeafCount> leaves)
    {
        string        uword = node.GetWord().ToUpper();
        NodeLeafCount word_info;

        if (!leaves.TryGetValue(uword, out word_info))
        {
            word_info = new NodeLeafCount();
            leaves.Add(uword, word_info);
        }

        word_info.total_count++;
        if (node.leafs.Count == 0)
        {
            word_info.leaf_count++;
        }

        foreach (SolarixGrammarEngineNET.SyntaxTreeNode leaf in node.leafs)
        {
            Edge4Stat edge = new Edge4Stat();
            edge.from = node;
            edge.to   = leaf;

            edges.Add(edge);

            AssembleEdges(leaf, edges, leaves);
        }

        return;
    }
Ejemplo n.º 8
0
 static bool IsPronoun_1s_nom(SolarixGrammarEngineNET.GrammarEngine2 gren, SolarixGrammarEngineNET.SyntaxTreeNode node)
 {
     return(GetPOS(gren, node) == SolarixGrammarEngineNET.GrammarEngineAPI.PRONOUN_ru &&
            node.GetCoordState(SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_ru) == SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_1_ru &&
            node.GetCoordState(SolarixGrammarEngineNET.GrammarEngineAPI.CASE_ru) == SolarixGrammarEngineNET.GrammarEngineAPI.NOMINATIVE_CASE_ru &&
            node.GetCoordState(SolarixGrammarEngineNET.GrammarEngineAPI.NUMBER_ru) == SolarixGrammarEngineNET.GrammarEngineAPI.SINGULAR_NUMBER_ru);
 }
Ejemplo n.º 9
0
    public void Check(SampleData sample)
    {
        n_test_samples++;

        for (int iword = 1; iword < sample.morphology.Count - 1; ++iword)
        {
            SolarixGrammarEngineNET.SyntaxTreeNode token = sample.morphology[iword];

            string wordform = token.GetWord().ToLower();
            string lemma    = gren.GetEntryName(token.GetEntryID());
            if (IsUnknownLexem(lemma) || IsNumword(lemma))
            {
                continue;
            }

            CheckData d = new CheckData();
            d.POS_tag  = tags.MatchTags(token, gren);
            d.wordform = wordform;
            d.lemma    = lemma;

            check_data_list.Add(d);
        }


        return;
    }
Ejemplo n.º 10
0
    public bool ProcessSample(SampleData sample)
    {
        n_learn_samples++;

        for (int iword = 1; iword < sample.morphology.Count - 1; ++iword)
        {
            SolarixGrammarEngineNET.SyntaxTreeNode token = sample.morphology[iword];

            string wordform = token.GetWord().ToLower();

            if (wordform.Contains("  "))
            {
                System.Text.RegularExpressions.Regex rx = new System.Text.RegularExpressions.Regex("[ ]{2,}");
                wordform = rx.Replace(wordform, " ");
            }

            string lemma = gren.GetEntryName(token.GetEntryID());
            if (IsUnknownLexem(lemma) || IsNumword(lemma))
            {
                continue;
            }

            int POS_tag = tags.MatchTags(token, gren);

            table.Store(POS_tag, wordform, lemma);
            n_learn_wordforms++;
        }


        return(true);
    }
Ejemplo n.º 11
0
    public bool Sample2Patterns(SampleData sample, List <SVM_ResultPatterns> patterns)
    {
        // Морфологический разбор
        // Для каждого слова, кроме первого и последнего токенов...

        for (int word_index = 1; word_index < sample.morphology.Count - 1; ++word_index)
        {
            // Собираем контекст для слова
            SVM.Node[] Xi = new SVM.Node[x_len];
            for (int k = 0; k < x_len; ++k)
            {
                Xi[k] = new SVM.Node(k + 1, 0.0);
            }

            int idx = 0;
            for (int ctx_index = word_index - context_span; ctx_index <= word_index + context_span; ++ctx_index, ++idx)
            {
                if (ctx_index >= 1 && ctx_index < sample.morphology.Count - 1)
                {
                    SolarixGrammarEngineNET.SyntaxTreeNode token = sample.morphology[ctx_index];
                    ConvertToken2X(token, idx, Xi);
                }
                else
                {
                    ConvertToken2X(null, idx, Xi);
                }
            }

            SolarixGrammarEngineNET.SyntaxTreeNode token0 = sample.morphology[word_index];
            string word = token0.GetWord().ToLower();

            // Решение
            foreach (SVM_X_Picker y in y_picker)
            {
                if (y.Match(token0, gren))
                {
                    int y_index = y.GetIndex();

                    patterns[y_index].Add(Xi, 1.0);

                    // для остальных Y'ов добавим этот же паттерн как "не-Yi"
                    for (int j = 0; j < y_picker.Count; ++j)
                    {
                        if (j != y_index)
                        {
                            patterns[j].Add(Xi, 0.0);
                        }
                    }

                    break;
                }
            }
        }

        return(true);
    }
Ejemplo n.º 12
0
    public bool Match(SolarixGrammarEngineNET.SyntaxTreeNode proj, int iver, SolarixGrammarEngineNET.GrammarEngine2 gren)
    {
        if (lexeme != null)
        {
            return(proj.GetWord().Equals(lexeme, StringComparison.InvariantCultureIgnoreCase));
        }

        if (id_lemma != null)
        {
            int ekey = proj.GetVersionEntryID(iver);
            if (id_lemma.Contains(ekey))
            {
                return(true);
            }

            return(false);
        }

        if (pos != null)
        {
            bool pos_matched = false;

            int ekey = proj.GetVersionEntryID(iver);
            if (ekey != -1)
            {
                int id_class = gren.GetEntryClass(ekey);
                pos_matched = pos.Contains(id_class);
            }

            if (!pos_matched)
            {
                return(false);
            }
        }

        if (pairs != null)
        {
            bool contains_all_required_pairs = true;
            foreach (SolarixGrammarEngineNET.CoordPair p in pairs)
            {
                if (!proj.VersionContains(iver, p))
                {
                    contains_all_required_pairs = false;
                    break;
                }
            }

            if (!contains_all_required_pairs)
            {
                return(false);
            }
        }

        return(true);
    }
Ejemplo n.º 13
0
    static void GetChunkNodes(SolarixGrammarEngineNET.SyntaxTreeNode node,
                              int chunk_index,
                              Dictionary <int /*word_index*/, int /*chunk_index*/> labels)
    {
        labels[node.GetWordPosition()] = chunk_index;

        for (int i = 0; i < node.leafs.Count; ++i)
        {
            GetChunkNodes(node.leafs[i], chunk_index, labels);
        }
    }
Ejemplo n.º 14
0
    private void CollectEdges(SolarixGrammarEngineNET.SyntaxTreeNode node, Dictionary <int, int> node2parent, Dictionary <int, string> node2word)
    {
        node2word.Add(node.GetWordPosition(), node.GetWord());

        for (int i = 0; i < node.leafs.Count; ++i)
        {
            SolarixGrammarEngineNET.SyntaxTreeNode child = node.leafs[i];
            node2parent.Add(child.GetWordPosition(), node.GetWordPosition());

            CollectEdges(child, node2parent, node2word);
        }
    }
Ejemplo n.º 15
0
        public AnalysisResults( GrammarEngine2 gren, IntPtr _hPack, bool release_handle )
        {
            hPack = new AnalysisResultsSafeHandle( _hPack, release_handle );
               nodes = new List<SyntaxTreeNode>();

               int n = SolarixGrammarEngineNET.GrammarEngine.sol_CountRoots( hPack.DangerousGetHandle(), 0 );
               for( int i = 0; i < n; ++i )
               {
            SyntaxTreeNode node = new SyntaxTreeNode( gren, SolarixGrammarEngineNET.GrammarEngine.sol_GetRoot( hPack.DangerousGetHandle(), 0, i ) );
            nodes.Add( node );
               }
        }
Ejemplo n.º 16
0
    static string ChangePronounTo(SolarixGrammarEngineNET.GrammarEngine2 gren, SolarixGrammarEngineNET.SyntaxTreeNode node, string to_person)
    {
        List <int> coords = new List <int>();
        List <int> states = new List <int>();

        if (to_person == "1s")
        {
            coords.Add(SolarixGrammarEngineNET.GrammarEngineAPI.NUMBER_ru);
            states.Add(SolarixGrammarEngineNET.GrammarEngineAPI.SINGULAR_NUMBER_ru);

            coords.Add(SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_ru);
            states.Add(SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_1_ru);
        }
        else if (to_person == "2s")
        {
            coords.Add(SolarixGrammarEngineNET.GrammarEngineAPI.NUMBER_ru);
            states.Add(SolarixGrammarEngineNET.GrammarEngineAPI.SINGULAR_NUMBER_ru);

            coords.Add(SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_ru);
            states.Add(SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_2_ru);
        }
        else if (to_person == "3s")
        {
            coords.Add(SolarixGrammarEngineNET.GrammarEngineAPI.NUMBER_ru);
            states.Add(SolarixGrammarEngineNET.GrammarEngineAPI.SINGULAR_NUMBER_ru);

            coords.Add(SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_ru);
            states.Add(SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_2_ru);
        }
        else
        {
            throw new ArgumentException("to_person");
        }


        coords.Add(SolarixGrammarEngineNET.GrammarEngineAPI.CASE_ru);
        states.Add(SolarixGrammarEngineNET.GrammarEngineAPI.NOMINATIVE_CASE_ru);

        string        new_word = "";
        List <string> fx       = SolarixGrammarEngineNET.GrammarEngine.sol_GenerateWordformsFX(gren.GetEngineHandle(), node.GetEntryID(), coords, states);

        if (fx != null && fx.Count > 0)
        {
            new_word = fx[0].ToLower();
        }
        else
        {
            new_word = null;
        }

        return(new_word);
    }
Ejemplo n.º 17
0
    int FindDepth(SolarixGrammarEngineNET.SyntaxTreeNode node)
    {
        int d = 1;
        int max_child_depth = 0;

        foreach (SolarixGrammarEngineNET.SyntaxTreeNode subnode in node.leafs)
        {
            int dd = FindDepth(subnode);
            max_child_depth = System.Math.Max(max_child_depth, dd);
        }

        return(max_child_depth + d);
    }
Ejemplo n.º 18
0
    private static string TermToString(SolarixGrammarEngineNET.GrammarEngine2 gren, SolarixGrammarEngineNET.SyntaxTreeNode term)
    {
        int id_entry = term.GetEntryID();

        if (gren.GetEntryName(id_entry) == "???")
        {
            return(term.GetWord());
        }

        string res_word = gren.RestoreCasing(id_entry, term.GetWord());

        return(res_word);
    }
Ejemplo n.º 19
0
    void AssembleChildren(SolarixGrammarEngineNET.SyntaxTreeNode root, List <NodeDistance> children, int distance)
    {
        foreach (SolarixGrammarEngineNET.SyntaxTreeNode subnode in root.leafs)
        {
            NodeDistance n = new NodeDistance();
            n.distance = distance;
            n.token    = subnode;
            children.Add(n);
            AssembleChildren(subnode, children, distance + 1);
        }

        return;
    }
Ejemplo n.º 20
0
    private static List <SolarixGrammarEngineNET.SyntaxTreeNode> GetTerms(SolarixGrammarEngineNET.SyntaxTreeNode n)
    {
        List <SolarixGrammarEngineNET.SyntaxTreeNode> res = new List <SolarixGrammarEngineNET.SyntaxTreeNode>();

        res.Add(n);

        foreach (var child in n.leafs)
        {
            res.AddRange(GetTerms(child));
        }

        return(res);
    }
Ejemplo n.º 21
0
    static bool IsVerb_1s(SolarixGrammarEngineNET.GrammarEngine2 gren, SolarixGrammarEngineNET.SyntaxTreeNode node)
    {
        if (GetPOS(gren, node) == SolarixGrammarEngineNET.GrammarEngineAPI.VERB_ru)
        {
            if (node.GetCoordState(SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_ru) == SolarixGrammarEngineNET.GrammarEngineAPI.PERSON_1_ru &&
                node.GetCoordState(SolarixGrammarEngineNET.GrammarEngineAPI.NUMBER_ru) == SolarixGrammarEngineNET.GrammarEngineAPI.SINGULAR_NUMBER_ru &&
                node.GetCoordState(SolarixGrammarEngineNET.GrammarEngineAPI.VERB_FORM_ru) == SolarixGrammarEngineNET.GrammarEngineAPI.VB_INF_ru)
            {
                return(true);
            }
        }

        return(false);
    }
Ejemplo n.º 22
0
    bool IsPreposition(SolarixGrammarEngineNET.SyntaxTreeNode token)
    {
        if (token.VersionCount() == 1)
        {
            int id_entry = token.GetEntryID();
            int pos      = gren.GetEntryClass(id_entry);
            if (pos == SolarixGrammarEngineNET.GrammarEngineAPI.PREPOS_ru)
            {
                return(true);
            }
        }

        return(false);
    }
    public OmonymContextEnumerator(SolarixGrammarEngineNET.AnalysisResults tokens, int _omonym_position, int _left_i, int _len, SolarixGrammarEngineNET.GrammarEngine2 _gren)
    {
        gren            = _gren;
        left_i          = _left_i;
        len             = _len;
        omonym_position = _omonym_position;

        recognizers = new List <ContextRecognizer>();

        // Для каждого токена, кроме омонимичной формы, генерируем список вариантов.

        // ... пока только код для контекстов длины=2
        if (len == 2)
        {
            SolarixGrammarEngineNET.SyntaxTreeNode omonym_token = tokens[left_i + omonym_position];

            OmonymTokenRecognizer omonym_point = new OmonymTokenRecognizer(omonym_position, omonym_token);

            if (omonym_position == 0)
            {
                TokenTagsEnumerator tte = new TokenTagsEnumerator(IsBoundaryToken(tokens, left_i + 1), tokens[left_i + 1], gren);

                int n = tte.Count;
                for (int i = 0; i < n; ++i)
                {
                    List <TokenRecognizer> points = new List <TokenRecognizer>();
                    points.Add(null); // это омонимичная форма
                    points.Add(tte[i]);
                    ContextRecognizer ctx_recognizer = new ContextRecognizer(points, omonym_point, gren);
                    recognizers.Add(ctx_recognizer);
                }
            }
            else
            {
                TokenTagsEnumerator tte = new TokenTagsEnumerator(IsBoundaryToken(tokens, left_i), tokens[left_i], gren);

                int n = tte.Count;
                for (int i = 0; i < n; ++i)
                {
                    List <TokenRecognizer> points = new List <TokenRecognizer>();
                    points.Add(tte[i]);
                    points.Add(null); // это омонимичная форма
                    ContextRecognizer ctx_recognizer = new ContextRecognizer(points, omonym_point, gren);
                    recognizers.Add(ctx_recognizer);
                }
            }
        }
    }
Ejemplo n.º 24
0
    void FillDistanceMatrix(SolarixGrammarEngineNET.SyntaxTreeNode node)
    {
        List <List <NodeDistance> > node_clouds = new List <List <NodeDistance> >();

        foreach (SolarixGrammarEngineNET.SyntaxTreeNode subnode in node.leafs)
        {
            List <NodeDistance> d = new List <NodeDistance>();

            NodeDistance n = new NodeDistance();
            n.token    = subnode;
            n.distance = 1;
            d.Add(n);

            AssembleChildren(subnode, d, 2);
            node_clouds.Add(d);

            foreach (NodeDistance nn in d)
            {
                AddDistance(node, nn.token, nn.distance);
            }
        }

        for (int i = 0; i < node_clouds.Count() - 1; ++i)
        {
            for (int j = i + 1; j < node_clouds.Count(); ++j)
            {
                List <NodeDistance> cloud1 = node_clouds[i];
                List <NodeDistance> cloud2 = node_clouds[j];

                foreach (NodeDistance d1 in cloud1)
                {
                    foreach (NodeDistance d2 in cloud2)
                    {
                        AddDistance(d1.token, d2.token, d1.distance + d2.distance);
                    }
                }
            }
        }

        foreach (SolarixGrammarEngineNET.SyntaxTreeNode subnode in node.leafs)
        {
            FillDistanceMatrix(subnode);
        }

        return;
    }
    private void CollectSubtreeNodeIndeces(SolarixGrammarEngineNET.SyntaxTreeNode node, List <int> word_index)
    {
        if (node.GetWord() == ".")
        {
            return;
        }

        word_index.Add(node.GetWordPosition());

        for (int ileaf = 0; ileaf < node.leafs.Count; ++ileaf)
        {
            SolarixGrammarEngineNET.SyntaxTreeNode leaf = node.leafs[ileaf];
            CollectSubtreeNodeIndeces(leaf, word_index);
        }

        return;
    }
Ejemplo n.º 26
0
    void TraverseEdges(SolarixGrammarEngineNET.SyntaxTreeNode token)
    {
        foreach (SolarixGrammarEngineNET.SyntaxTreeNode leaf in token.leafs)
        {
            int distance = System.Math.Abs(leaf.GetWordPosition() - token.GetWordPosition());
            if (edge_len2count.ContainsKey(distance))
            {
                edge_len2count[distance] = edge_len2count[distance] + 1;
            }
            else
            {
                edge_len2count.Add(distance, 1);
            }

            TraverseEdges(leaf);
        }
    }
Ejemplo n.º 27
0
    string GetTokenVersionLemma(int version_index, SolarixGrammarEngineNET.SyntaxTreeNode token)
    {
        string lemma = string.Empty;
        int    ekey  = token.GetVersionEntryID(version_index);
        string ename = gren.GetEntryName(ekey);

        if (IsUnknownLexem(ename))
        {
            lemma = token.GetWord().ToLower();
        }
        else
        {
            lemma = ename.ToLower();
        }

        return(lemma);
    }
Ejemplo n.º 28
0
    public int MatchTags(SolarixGrammarEngineNET.SyntaxTreeNode token, SolarixGrammarEngineNET.GrammarEngine2 gren)
    {
        foreach (TagMatcher m in matchers)
        {
            if (m.Match(token, gren))
            {
                return(m.GetId());
            }
        }

        int    entry_id       = token.GetEntryID();
        int    pos_id         = gren.GetEntryClass(entry_id);
        string part_of_speech = gren.GetClassName(pos_id);
        string tags           = string.Join(" ", token.GetPairs().Select(z => string.Format("{0}={1}", gren.GetCoordName(z.CoordID), gren.GetCoordStateName(z.CoordID, z.StateID))).ToArray());
        string msg            = string.Format("Can not find tag for {0} {{ {1} {2} }}", token.GetWord(), part_of_speech, tags);

        throw new ApplicationException(msg);
    }
    public void ProcessSample(string line, SolarixGrammarEngineNET.AnalysisResults tokens, int LanguageID, SolarixGrammarEngineNET.GrammarEngine2 gren)
    {
        int sample_len = tokens.Count;

        for (int i = 0; i < sample_len; ++i)
        {
            if (tokens[i].GetWord().Equals(word, StringComparison.CurrentCultureIgnoreCase))
            {
                int position = i;

                // Омоним распознан однозначно?
                SolarixGrammarEngineNET.SyntaxTreeNode omonym_token = tokens[position];

                // Создаем все возможные контексты с омонимом - разной длины, с разным местоположением.
                int MIN_CONTEXT_LEN = 2, MAX_CONTEXT_LEN = 2;
                for (int left_i = position - MAX_CONTEXT_LEN + 1; left_i <= position; ++left_i)
                {
                    if (left_i >= 0)
                    {
                        int min_len = Math.Max(position - left_i + 1, MIN_CONTEXT_LEN);
                        for (int ctx_len = min_len; ctx_len <= MAX_CONTEXT_LEN; ++ctx_len)
                        {
                            OmonymContext ctx;
                            ctx.len      = ctx_len;
                            ctx.position = position - left_i;

                            if (rules.ContainsKey(ctx))
                            {
                                rules[ctx].ProcessSample(line, tokens, left_i, gren);
                            }
                            else
                            {
                                OmonymRule rule = new OmonymRule(ctx);
                                rules.Add(ctx, rule);
                                rule.ProcessSample(line, tokens, left_i, gren);
                            }
                        }
                    }
                }
            }
        }

        return;
    }
Ejemplo n.º 30
0
    public void ProcessSample(string line)
    {
        if (samples.Contains(line))
        {
            return;
        }

        samples.Add(line);

        bool complete = false;

        using (SolarixGrammarEngineNET.AnalysisResults tokens = gren.AnalyzeSyntax(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_COMPLETE_ONLY, 0))
        {
            if (tokens.Count == 3)
            {
                complete = true;
                TraverseNode(tokens[1]);
            }
        }

        if (!complete)
        {
            // Морфологический разбор
            using (SolarixGrammarEngineNET.AnalysisResults tokens = gren.AnalyzeMorphology(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_COMPLETE_ONLY))
            {
                for (int iword = 1; iword < tokens.Count - 2; ++iword)
                {
                    SolarixGrammarEngineNET.SyntaxTreeNode token  = tokens[iword];
                    SolarixGrammarEngineNET.SyntaxTreeNode token2 = tokens[iword + 1];

                    if (IsPreposition(token) && IsNoun(token2))
                    {
                        Store_Prepos_Noun(token, token2);
                    }
                    else if (IsVerb(token) && IsPreposition(token2))
                    {
                        Store_Verb_Prepos(token, token2);
                    }
                }
            }
        }

        return;
    }
 private string GetTokenSuffix(int pos, int last_word_index, SolarixGrammarEngineNET.SyntaxTreeNode token)
 {
     if (pos == 0)
     {
         string sfx = "~~BEGIN~~";
         return(sfx);
     }
     else if (pos == last_word_index)
     {
         string sfx = "~~END~~";
         return(sfx);
     }
     else
     {
         string word   = token.GetWord().ToLower();
         string suffix = GetSuffix(word);
         return(suffix);
     }
 }