Example #1
0
    public void Validate()
    {
        MatchCollection matches = Regex.Matches(template, regexPattern);

        foreach (Match m in matches)
        {
            WordTags tags = (WordTags)System.Enum.Parse(typeof(WordTags), m.Groups[1].Value, true);
            if (!(System.Enum.IsDefined(typeof(WordTags), tags) || tags.ToString().Contains(",")))
            {
                Debug.LogError("{" + template + "} could not parse " + m.Groups[1].Value);
            }
        }
    }
    void PullFeatures2(System.Text.StringBuilder b, List <WordTags> token_features, int ifocus, int offset1, int offset2)
    {
        int iword1 = ifocus + offset1;
        int iword2 = ifocus + offset2;

        if (iword1 >= 0 && iword1 < token_features.Count && iword2 >= 0 && iword2 < token_features.Count)
        {
            WordTags f1 = token_features[iword1];
            WordTags f2 = token_features[iword2];

            b.AppendFormat("\tF[{0},{1}]={2},{3}", offset1, offset2, f1.common, f2.common);
        }

        return;
    }
    void PullFeatures3(System.Text.StringBuilder b, List <WordTags> token_features, int ifocus, int offset1, int offset2, int offset3)
    {
        int iword1 = ifocus + offset1;
        int iword2 = ifocus + offset2;
        int iword3 = ifocus + offset3;

        if (iword1 >= 0 && iword1 < token_features.Count && iword2 >= 0 && iword2 < token_features.Count && iword3 >= 0 && iword3 < token_features.Count)
        {
            WordTags f1 = token_features[iword1];
            WordTags f2 = token_features[iword2];
            WordTags f3 = token_features[iword3];

            b.AppendFormat("\ttag[{0},{1},{2}]={3},{4},{5}", offset1, offset2, offset3, f1.common, f2.common, f3.common);
        }

        return;
    }
    void PullFeatures1(System.Text.StringBuilder b, List <WordTags> token_features, int ifocus, int offset)
    {
        int iword = ifocus + offset;

        if (iword >= 0 && iword < token_features.Count)
        {
            WordTags f = token_features[iword];
            b.AppendFormat("\tF[{0}]={1}", offset, f.common);

            if (f.modality != -1)
            {
                b.AppendFormat("\tM[{0}]={1}", offset, f.common);
            }

            if (f.valency != -1)
            {
                b.AppendFormat("\tV[{0}]={1}", offset, f.common);
            }
        }

        return;
    }
Example #5
0
    public List <string> GetWordsFromTags(CardTags cardTags, WordTags wordTags, List <string> excludeWords = null)
    {
        List <string> list = new List <string>();

        foreach (WordBankEntry entry in words)
        {
            if (excludeWords != null && excludeWords.Contains(entry.word))
            {
                continue;
            }

            bool valid = (cardTags & entry.requiredTags) == entry.requiredTags;
            valid &= (entry.categoryTags == CardTags.NONE || (cardTags & entry.categoryTags) != CardTags.NONE);
            valid &= (wordTags & entry.wordTags) == wordTags;

            if (valid)
            {
                list.Add(entry.word);
            }
        }
        return(list);
    }
    public bool Sentence2Features(string line)
    {
        // синтаксический разбор в дерево
        using (SolarixGrammarEngineNET.AnalysisResults trees = gren.AnalyzeSyntax(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_COMPLETE_ONLY, 0))
        {
            // Морфологический разбор
            using (SolarixGrammarEngineNET.AnalysisResults tokens = gren.AnalyzeMorphology(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_COMPLETE_ONLY))
            {
                TreeLookup syntax = new TreeLookup();
                syntax.Collect(tokens, trees, gren);

                if (!syntax.ok)
                {
                    return(false);
                }

                int N = tokens.Count;

                List <WordTags> tag_index = new List <WordTags>();
                List <string>   words     = new List <string>();
                List <string>   labels    = new List <string>();

                WordTags start_t = new WordTags();
                start_t.common = START_id;
                tag_index.Add(start_t);
                words.Add("<START>");
                labels.Add("O");

                for (int iword = 1; iword < tokens.Count - 1; ++iword)
                {
                    SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[iword];
                    string word = token.GetWord().ToLower();

                    SolarixGrammarEngineNET.SyntaxTreeNode token_prev = tokens[iword - 1];

                    WordTags t = new WordTags();

                    t.common   = tags.MatchTags(tokens[iword], gren);
                    t.modality = tags_modality.MatchTags(tokens[iword], gren);
                    t.valency  = tags_valency.MatchTags(tokens[iword], gren);

                    tag_index.Add(t);

                    string crf_word = word.Replace(" ", "_");
                    words.Add(crf_word);

                    labels.Add(syntax.GetTokenLabel(iword));
                }

                WordTags end_t = new WordTags();
                end_t.common = END_id;
                tag_index.Add(end_t);
                words.Add("<END>");
                labels.Add("O");

                System.Text.StringBuilder b = new System.Text.StringBuilder();

                int last_word_index = tokens.Count - 1;
                for (int iword = 0; iword < tokens.Count; ++iword)
                {
                    b.Length = 0;

                    string output_label = labels[iword];
                    string word         = words[iword];

//     PullFeatures1( b, tag_index, iword, -3 );
                    PullFeatures1(b, tag_index, iword, -2);
                    PullFeatures1(b, tag_index, iword, -1);
                    PullFeatures1(b, tag_index, iword, 0);
                    PullFeatures1(b, tag_index, iword, 1);
                    PullFeatures1(b, tag_index, iword, 2);
//     PullFeatures1( b, tag_index, iword, 3 );

//     PullFeatures2( b, tag_index, iword, -3, -2 );
                    PullFeatures2(b, tag_index, iword, -2, -1);
                    PullFeatures2(b, tag_index, iword, -1, 0);
                    PullFeatures2(b, tag_index, iword, 0, 1);
                    PullFeatures2(b, tag_index, iword, 1, 2);
//     PullFeatures2( b, tag_index, iword, 3, 4 );

//     PullFeatures3( b, tag_index, iword, -3, -2, -1 );
                    PullFeatures3(b, tag_index, iword, -2, -1, 0);
                    PullFeatures3(b, tag_index, iword, -1, 0, 1);
                    PullFeatures3(b, tag_index, iword, 0, 1, 2);
//     PullFeatures3( b, tag_index, iword, 1, 2, 3 );

                    crf_file.WriteLine("{0}{1}", output_label, b.ToString());

                    visual_file.WriteLine("{0}\t{1}\t{2}", word, output_label, tag_index[iword]);
                }

                crf_file.WriteLine("");
                visual_file.WriteLine("");
            }
        }

        return(true);
    }