public void Validate() { MatchCollection matches = Regex.Matches(template, regexPattern); foreach (Match m in matches) { WordTags tags = (WordTags)System.Enum.Parse(typeof(WordTags), m.Groups[1].Value, true); if (!(System.Enum.IsDefined(typeof(WordTags), tags) || tags.ToString().Contains(","))) { Debug.LogError("{" + template + "} could not parse " + m.Groups[1].Value); } } }
void PullFeatures2(System.Text.StringBuilder b, List <WordTags> token_features, int ifocus, int offset1, int offset2) { int iword1 = ifocus + offset1; int iword2 = ifocus + offset2; if (iword1 >= 0 && iword1 < token_features.Count && iword2 >= 0 && iword2 < token_features.Count) { WordTags f1 = token_features[iword1]; WordTags f2 = token_features[iword2]; b.AppendFormat("\tF[{0},{1}]={2},{3}", offset1, offset2, f1.common, f2.common); } return; }
void PullFeatures3(System.Text.StringBuilder b, List <WordTags> token_features, int ifocus, int offset1, int offset2, int offset3) { int iword1 = ifocus + offset1; int iword2 = ifocus + offset2; int iword3 = ifocus + offset3; if (iword1 >= 0 && iword1 < token_features.Count && iword2 >= 0 && iword2 < token_features.Count && iword3 >= 0 && iword3 < token_features.Count) { WordTags f1 = token_features[iword1]; WordTags f2 = token_features[iword2]; WordTags f3 = token_features[iword3]; b.AppendFormat("\ttag[{0},{1},{2}]={3},{4},{5}", offset1, offset2, offset3, f1.common, f2.common, f3.common); } return; }
void PullFeatures1(System.Text.StringBuilder b, List <WordTags> token_features, int ifocus, int offset) { int iword = ifocus + offset; if (iword >= 0 && iword < token_features.Count) { WordTags f = token_features[iword]; b.AppendFormat("\tF[{0}]={1}", offset, f.common); if (f.modality != -1) { b.AppendFormat("\tM[{0}]={1}", offset, f.common); } if (f.valency != -1) { b.AppendFormat("\tV[{0}]={1}", offset, f.common); } } return; }
public List <string> GetWordsFromTags(CardTags cardTags, WordTags wordTags, List <string> excludeWords = null) { List <string> list = new List <string>(); foreach (WordBankEntry entry in words) { if (excludeWords != null && excludeWords.Contains(entry.word)) { continue; } bool valid = (cardTags & entry.requiredTags) == entry.requiredTags; valid &= (entry.categoryTags == CardTags.NONE || (cardTags & entry.categoryTags) != CardTags.NONE); valid &= (wordTags & entry.wordTags) == wordTags; if (valid) { list.Add(entry.word); } } return(list); }
public bool Sentence2Features(string line) { // синтаксический разбор в дерево using (SolarixGrammarEngineNET.AnalysisResults trees = gren.AnalyzeSyntax(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_COMPLETE_ONLY, 0)) { // Морфологический разбор using (SolarixGrammarEngineNET.AnalysisResults tokens = gren.AnalyzeMorphology(line, LanguageID, SolarixGrammarEngineNET.GrammarEngine.MorphologyFlags.SOL_GREN_COMPLETE_ONLY)) { TreeLookup syntax = new TreeLookup(); syntax.Collect(tokens, trees, gren); if (!syntax.ok) { return(false); } int N = tokens.Count; List <WordTags> tag_index = new List <WordTags>(); List <string> words = new List <string>(); List <string> labels = new List <string>(); WordTags start_t = new WordTags(); start_t.common = START_id; tag_index.Add(start_t); words.Add("<START>"); labels.Add("O"); for (int iword = 1; iword < tokens.Count - 1; ++iword) { SolarixGrammarEngineNET.SyntaxTreeNode token = tokens[iword]; string word = token.GetWord().ToLower(); SolarixGrammarEngineNET.SyntaxTreeNode token_prev = tokens[iword - 1]; WordTags t = new WordTags(); t.common = tags.MatchTags(tokens[iword], gren); t.modality = tags_modality.MatchTags(tokens[iword], gren); t.valency = tags_valency.MatchTags(tokens[iword], gren); tag_index.Add(t); string crf_word = word.Replace(" ", "_"); words.Add(crf_word); labels.Add(syntax.GetTokenLabel(iword)); } WordTags end_t = new WordTags(); end_t.common = END_id; tag_index.Add(end_t); words.Add("<END>"); labels.Add("O"); System.Text.StringBuilder b = new System.Text.StringBuilder(); int last_word_index = tokens.Count - 1; for (int iword = 0; iword < tokens.Count; ++iword) { b.Length = 0; string output_label = labels[iword]; string word = words[iword]; // PullFeatures1( b, tag_index, iword, -3 ); PullFeatures1(b, tag_index, iword, -2); PullFeatures1(b, tag_index, iword, -1); PullFeatures1(b, tag_index, iword, 0); PullFeatures1(b, tag_index, iword, 1); PullFeatures1(b, tag_index, iword, 2); // PullFeatures1( b, tag_index, iword, 3 ); // PullFeatures2( b, tag_index, iword, -3, -2 ); PullFeatures2(b, tag_index, iword, -2, -1); PullFeatures2(b, tag_index, iword, -1, 0); PullFeatures2(b, tag_index, iword, 0, 1); PullFeatures2(b, tag_index, iword, 1, 2); // PullFeatures2( b, tag_index, iword, 3, 4 ); // PullFeatures3( b, tag_index, iword, -3, -2, -1 ); PullFeatures3(b, tag_index, iword, -2, -1, 0); PullFeatures3(b, tag_index, iword, -1, 0, 1); PullFeatures3(b, tag_index, iword, 0, 1, 2); // PullFeatures3( b, tag_index, iword, 1, 2, 3 ); crf_file.WriteLine("{0}{1}", output_label, b.ToString()); visual_file.WriteLine("{0}\t{1}\t{2}", word, output_label, tag_index[iword]); } crf_file.WriteLine(""); visual_file.WriteLine(""); } } return(true); }