private void FindTreePattern(Tree tree, TregexPattern tgrepPattern, ICollection <Pair <int, int> > foundPairs)
 {
     try
     {
         TregexMatcher m = tgrepPattern.Matcher(tree);
         while (m.Find())
         {
             Tree t   = m.GetMatch();
             Tree np1 = m.GetNode("m1");
             Tree np2 = m.GetNode("m2");
             Tree np3 = null;
             if (tgrepPattern.Pattern().Contains("m3"))
             {
                 np3 = m.GetNode("m3");
             }
             AddFoundPair(np1, np2, t, foundPairs);
             if (np3 != null)
             {
                 AddFoundPair(np2, np3, t, foundPairs);
             }
         }
     }
     catch (Exception e)
     {
         // shouldn't happen....
         throw new Exception(e);
     }
 }
Esempio n. 2
0
        public virtual void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            tree.IndexLeaves();
            SemanticGraph basicDependency    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhancedDependency == null)
            {
                enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            TregexPattern tgrepPattern = npOrPrpMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);

            while (matcher.Find())
            {
                Tree         t        = matcher.GetMatch();
                IList <Tree> mLeaves  = t.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                //if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with ,
                IntPair mSpan = new IntPair(beginIdx, endIdx);
                if (!mentionSpanSet.Contains(mSpan) && (lang == Locale.Chinese || !InsideNE(mSpan, namedEntitySpanSet)))
                {
                    //      if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP")) ) {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                }
            }
        }
        public virtual void TestBackReference()
        {
            TregexPattern   tregex   = TregexPattern.Compile("__ <1 B=n <2 ~n");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel n X");

            RunTest(tregex, tsurgeon, "(A (B w) (B w))", "(A (X w) (B w))");
        }
Esempio n. 4
0
 private static TregexPattern[] GetPleonasticPatterns()
 {
     string[] patterns = new string[] { "@NP < (PRP=m1 < it|IT|It) $.. (@VP < (/^V.*/ < /^(?i:is|was|be|becomes|become|became)$/ $.. (@VP < (VBN $.. @S|SBAR))))", "NP < (PRP=m1) $.. (VP < ((/^V.*/ < /^(?:is|was|become|became)/) $.. (ADJP $.. (/S|SBAR/))))"
                                        , "NP < (PRP=m1) $.. (VP < ((/^V.*/ < /^(?:is|was|become|became)/) $.. (ADJP < (/S|SBAR/))))", "NP < (PRP=m1) $.. (VP < ((/^V.*/ < /^(?:is|was|become|became)/) $.. (NP < /S|SBAR/)))", "NP < (PRP=m1) $.. (VP < ((/^V.*/ < /^(?:is|was|become|became)/) $.. (NP $.. ADVP $.. /S|SBAR/)))"
                                        , "NP < (PRP=m1) $.. (VP < (MD $.. (VP < ((/^V.*/ < /^(?:be|become)/) $.. (VP < (VBN $.. /S|SBAR/))))))", "NP < (PRP=m1) $.. (VP < (MD $.. (VP < ((/^V.*/ < /^(?:be|become)/) $.. (ADJP $.. (/S|SBAR/))))))", "NP < (PRP=m1) $.. (VP < (MD $.. (VP < ((/^V.*/ < /^(?:be|become)/) $.. (ADJP < (/S|SBAR/))))))"
                                        , "NP < (PRP=m1) $.. (VP < (MD $.. (VP < ((/^V.*/ < /^(?:be|become)/) $.. (NP < /S|SBAR/)))))", "NP < (PRP=m1) $.. (VP < (MD $.. (VP < ((/^V.*/ < /^(?:be|become)/) $.. (NP $.. ADVP $.. /S|SBAR/)))))", "NP < (PRP=m1) $.. (VP < ((/^V.*/ < /^(?:seems|appears|means|follows)/) $.. /S|SBAR/))"
                                        , "NP < (PRP=m1) $.. (VP < ((/^V.*/ < /^(?:turns|turned)/) $.. PRT $.. /S|SBAR/))" };
     // cdm 2013: I spent a while on these patterns. I fixed a syntax error in five patterns ($.. split with space), so it now shouldn't exception in checkPleonastic. This gave 0.02% on CoNLL11 dev
     // I tried some more precise patterns but they didn't help. Indeed, they tended to hurt vs. the higher recall patterns.
     //"NP < (PRP=m1) $.. (VP < ((/^V.*/ < /^(?:is|was|become|became)/) $.. (VP < (VBN $.. /S|SBAR/))))", // overmatches
     // "@NP < (PRP=m1 < it|IT|It) $.. (@VP < (/^V.*/ < /^(?i:is|was|be|becomes|become|became)$/ $.. (@VP < (VBN < expected|hoped $.. @SBAR))))",  // this one seems more accurate, but ...
     // in practice, go with this one (best results)
     // "@NP < (PRP=m1 < it|IT|It) $.. (@VP < (/^V.*/ < /^(?i:is|was|be|becomes|become|became)$/ $.. (@ADJP < (/^(?:JJ|VB)/ < /^(?i:(?:hard|tough|easi)(?:er|est)?|(?:im|un)?(?:possible|interesting|worthwhile|likely|surprising|certain)|disappointing|pointless|easy|fine|okay)$/) [ < @S|SBAR | $.. (@S|SBAR !< (IN !< for|For|FOR|that|That|THAT)) ] )))", // does worse than above 2 on CoNLL11 dev
     // "@NP < (PRP=m1 < it|IT|It) $.. (@VP < (/^V.*/ < /^(?i:is|was|be|becomes|become|became)$/ $.. (@NP $.. @ADVP $.. @SBAR)))", // cleft examples, generalized to not need ADVP; but gave worse CoNLL12 dev numbers....
     // these next 5 had buggy space in "$ ..", which I fixed
     // extraposed. OK 1/2 correct; need non-adverbial case
     // OK: 3/3 good matches on dev; but 3/4 wrong on WSJ
     // certain can be either but relatively likely pleonastic with it ... be
     // "@NP < (PRP=m1 < it|IT|It) $.. (@VP < (MD $.. (@VP < ((/^V.*/ < /^(?:be|become)/) $.. (@ADJP < (/^JJ/ < /^(?i:(?:hard|tough|easi)(?:er|est)?|(?:im|un)?(?:possible|interesting|worthwhile|likely|surprising|certain)|disappointing|pointless|easy|fine|okay))$/) [ < @S|SBAR | $.. (@S|SBAR !< (IN !< for|For|FOR|that|That|THAT)) ] )))))", // GOOD REPLACEMENT ; 2nd clause is for extraposed ones
     TregexPattern[] tgrepPatterns = new TregexPattern[patterns.Length];
     for (int i = 0; i < tgrepPatterns.Length; i++)
     {
         tgrepPatterns[i] = TregexPattern.Compile(patterns[i]);
     }
     return(tgrepPatterns);
 }
        public virtual void TestReplaceTree()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace foo (BAR 1)");
            TregexPattern   tregex   = TregexPattern.Compile("B=foo");

            RunTest(tregex, tsurgeon, "(A (B 0) (B 1) (C 2))", "(A (BAR 1) (BAR 1) (C 2))");
            // test that a single replacement at the root is allowed
            RunTest(tregex, tsurgeon, "(B (C 1))", "(BAR 1)");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace foo (BAR 1) (BAZ 2)");
            RunTest(tregex, tsurgeon, "(A (B 0) (B 1) (C 2))", "(A (BAR 1) (BAZ 2) (BAR 1) (BAZ 2) (C 2))");
            try
            {
                RunTest(tregex, tsurgeon, "(B 0)", "(B 0)");
                throw new Exception("Expected a failure");
            }
            catch (TsurgeonRuntimeException)
            {
            }
            // good, we expected to fail if you try to replace the root node with two nodes
            // it is possible for numbers to work and words to not work if
            // the tsurgeon parser is not correct
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace foo (BAR blah)");
            tregex   = TregexPattern.Compile("B=foo");
            RunTest(tregex, tsurgeon, "(A (B 0) (B 1) (C 2))", "(A (BAR blah) (BAR blah) (C 2))");
        }
Esempio n. 6
0
        protected internal static void ExtractEnumerations(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree          tree         = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
            SemanticGraph dependency   = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
            TregexPattern tgrepPattern = enumerationsMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);
            IDictionary <IntPair, Tree> spanToMentionSubTree = Generics.NewHashMap();

            while (matcher.Find())
            {
                matcher.GetMatch();
                Tree         m1       = matcher.GetNode("m1");
                Tree         m2       = matcher.GetNode("m2");
                IList <Tree> mLeaves  = m1.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                spanToMentionSubTree[new IntPair(beginIdx, endIdx)] = m1;
                mLeaves  = m2.GetLeaves();
                beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                spanToMentionSubTree[new IntPair(beginIdx, endIdx)] = m2;
            }
            foreach (IntPair mSpan in spanToMentionSubTree.Keys)
            {
                if (!mentionSpanSet.Contains(mSpan) && !InsideNE(mSpan, namedEntitySpanSet))
                {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, mSpan.Get(0), mSpan.Get(1), dependency, new List <CoreLabel>(sent.SubList(mSpan.Get(0), mSpan.Get(1))), spanToMentionSubTree[mSpan]);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                }
            }
        }
        public virtual void TestInsert()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E 6)) $+ bar");
            TregexPattern   tregex   = TregexPattern.Compile("B=bar !$ D");

            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (D (E 6)) (B 0) (C 1))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E 6)) $- bar");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B 0) (D (E 6)) (C 1))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E 6)) >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<D");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (D (E 6)) 0) (C 1))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert foo >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<C $C=foo");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (C 1) 0) (C 1))");
            // the name will be cut off
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E=blah 6)) >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<D");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (D (E 6)) 0) (C 1))");
            // the name should not be cut off, with the escaped = unescaped now
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E\\=blah 6)) >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<D");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (D (E=blah 6)) 0) (C 1))");
            // the name should be cut off again, with a \ at the end of the new node
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E\\\\=blah 6)) >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<D");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (D (E\\ 6)) 0) (C 1))");
        }
Esempio n. 8
0
        protected internal static void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            tree.IndexLeaves();
            SemanticGraph dependency   = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
            TregexPattern tgrepPattern = npOrPrpMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);

            while (matcher.Find())
            {
                Tree         t        = matcher.GetMatch();
                IList <Tree> mLeaves  = t.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                if (",".Equals(sent[endIdx - 1].Word()))
                {
                    endIdx--;
                }
                // try not to have span that ends with ,
                IntPair mSpan = new IntPair(beginIdx, endIdx);
                if (!mentionSpanSet.Contains(mSpan) && !InsideNE(mSpan, namedEntitySpanSet))
                {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                }
            }
        }
Esempio n. 9
0
        private IList <Pair <TregexPattern, TsurgeonPattern> > LoadOps()
        {
            IList <Pair <TregexPattern, TsurgeonPattern> > ops = new List <Pair <TregexPattern, TsurgeonPattern> >();
            string line = null;

            try
            {
                BufferedReader          br  = new BufferedReader(new StringReader(editStr));
                IList <TsurgeonPattern> tsp = new List <TsurgeonPattern>();
                while ((line = br.ReadLine()) != null)
                {
                    TregexPattern matchPattern = TregexPattern.Compile(line);
                    tsp.Clear();
                    while (Continuing(line = br.ReadLine()))
                    {
                        TsurgeonPattern p = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation(line);
                        tsp.Add(p);
                    }
                    if (!tsp.IsEmpty())
                    {
                        TsurgeonPattern tp = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.CollectOperations(tsp);
                        ops.Add(new Pair <TregexPattern, TsurgeonPattern>(matchPattern, tp));
                    }
                }
            }
            catch (IOException ioe)
            {
                // while not at end of file
                Sharpen.Runtime.PrintStackTrace(ioe);
            }
            return(ops);
        }
        public virtual void TestForeign()
        {
            TregexPattern   tregex   = TregexPattern.Compile("atentát=test");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel test perform_atentát");

            RunTest(tregex, tsurgeon, "(foo atentát)", "(foo perform_atentát)");
        }
        public virtual void TestKeyword()
        {
            // This should successfully compile, assuming the keyword parsing is correct
            TregexPattern   tregex   = TregexPattern.Compile("A=foo << B=bar << C=baz");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel foo relabel");

            RunTest(tregex, tsurgeon, "(A (B foo) (C foo) (C bar))", "(relabel (B foo) (C foo) (C bar))");
        }
        /// <summary>Right now this outputs trees in PTB format.</summary>
        /// <remarks>
        /// Right now this outputs trees in PTB format.  It outputs one tree
        /// at a time until we have output enough trees to fill the given
        /// file, then moves on to the next file.  Trees are output in the
        /// order given in the <code>ids</code> list.
        /// <br />
        /// Trees have their words replaced with the words' lemmas, if those
        /// lemmas exist.
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        public static void OutputSplits(IList <string> ids, IDictionary <string, Tree> treeMap)
        {
            IQueue <int>     fSizeQueue = new LinkedList <int>(Arrays.AsList(fSizes));
            IQueue <string>  fNameQueue = new LinkedList <string>(Arrays.AsList(fNames));
            TregexPattern    pBadTree   = TregexPattern.Compile("@SENT <: @PUNC");
            TregexPattern    pBadTree2  = TregexPattern.Compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
            ITreeTransformer tt         = new FTBCorrector();
            int    size     = fSizeQueue.Remove();
            string filename = fNameQueue.Remove();

            log.Info("Outputing " + filename);
            PrintWriter writer      = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF-8")));
            int         outputCount = 0;

            foreach (string id in ids)
            {
                if (!treeMap.Contains(id))
                {
                    log.Info("Missing id: " + id);
                    continue;
                }
                Tree          tree = treeMap[id];
                TregexMatcher m    = pBadTree.Matcher(tree);
                TregexMatcher m2   = pBadTree2.Matcher(tree);
                if (m.Find() || m2.Find())
                {
                    log.Info("Discarding tree: " + tree.ToString());
                    continue;
                }
                // Punctuation normalization, etc.
                Tree backupCopy = tree.DeepCopy();
                tree = tt.TransformTree(tree);
                if (tree.FirstChild().Children().Length == 0)
                {
                    // Some trees have only punctuation. Tregex will mangle these. Don't throw those away.
                    log.Info("Saving tree: " + tree.ToString());
                    log.Info("Backup: " + backupCopy.ToString());
                    tree = backupCopy;
                }
                if (LemmasAsLeaves || AddMorphoToLeaves)
                {
                    MungeLeaves(tree, LemmasAsLeaves, AddMorphoToLeaves);
                }
                ReplacePOSTags(tree);
                writer.Println(tree.ToString());
                ++outputCount;
                if (outputCount == size)
                {
                    outputCount = 0;
                    size        = fSizeQueue.Remove();
                    filename    = fNameQueue.Remove();
                    log.Info("Outputing " + filename);
                    writer.Close();
                    writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF-8")));
                }
            }
            writer.Close();
        }
        public virtual void TestChineseReplaceTree()
        {
            string          input    = "(IP (IP (PP (P 像) (NP (NP (NR 赖斯) (PU ,) (NR 赖斯)) (NP (PN 本身)))) (PU 她{) (NP (NN breath)) (PU }) (IJ 呃) (VP (VV 担任) (NP (NN 国务卿)) (VP (ADVP (AD 比较)) (VP (VA 晚))))))";
            string          expected = "(IP (IP (PP (P 像) (NP (NP (NR 赖斯) (PU ,) (NR 赖斯)) (NP (PN 本身)))) (PN 她) (PU {) (NP (NN breath)) (PU }) (IJ 呃) (VP (VV 担任) (NP (NN 国务卿)) (VP (ADVP (AD 比较)) (VP (VA 晚))))))";
            TregexPattern   tregex   = TregexPattern.Compile("PU=punc < 她{");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace punc (PN 她) (PU {)");

            RunTest(tregex, tsurgeon, input, expected);
        }
        public virtual void TestReplaceWithRepeats()
        {
            TsurgeonPattern tsurgeon;
            TregexPattern   tregex;

            tregex   = TregexPattern.Compile("@NP < (/^,/=comma $+ CC)");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace comma (COMMA)");
            RunTest(tregex, tsurgeon, "(NP NP , NP , NP , CC NP)", "(NP NP , NP , NP COMMA CC NP)");
        }
        public virtual void TestCreateSubtrees()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO left right");
            TregexPattern   tregex   = TregexPattern.Compile("A < B=left < C=right");

            // Verify when there are only two nodes
            RunTest(tregex, tsurgeon, "(A (B 1) (C 2))", "(A (FOO (B 1) (C 2)))");
            // We allow backwards nodes as well
            RunTest(tregex, tsurgeon, "(A (C 1) (B 2))", "(A (FOO (C 1) (B 2)))");
            // Check nodes in between
            RunTest(tregex, tsurgeon, "(A (B 1) (D 3) (C 2))", "(A (FOO (B 1) (D 3) (C 2)))");
            // Check nodes outside the span
            RunTest(tregex, tsurgeon, "(A (D 3) (B 1) (C 2))", "(A (D 3) (FOO (B 1) (C 2)))");
            RunTest(tregex, tsurgeon, "(A (B 1) (C 2) (D 3))", "(A (FOO (B 1) (C 2)) (D 3))");
            RunTest(tregex, tsurgeon, "(A (D 3) (B 1) (C 2) (E 4))", "(A (D 3) (FOO (B 1) (C 2)) (E 4))");
            // Check when the two endpoints are the same
            tregex = TregexPattern.Compile("A < B=left < B=right");
            RunTest(tregex, tsurgeon, "(A (B 1) (C 2))", "(A (FOO (B 1)) (C 2))");
            // Check double operation - should make two FOO nodes and then stop
            RunTest(tregex, tsurgeon, "(A (B 1) (B 2))", "(A (FOO (B 1)) (FOO (B 2)))");
            // Check when we only have one argument to createSubtree
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO child");
            tregex   = TregexPattern.Compile("A < B=child");
            RunTest(tregex, tsurgeon, "(A (B 1) (C 2))", "(A (FOO (B 1)) (C 2))");
            RunTest(tregex, tsurgeon, "(A (B 1) (B 2))", "(A (FOO (B 1)) (FOO (B 2)))");
            // Check that incorrectly formatted operations don't successfully parse
            try
            {
                tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO");
                throw new AssertionError("Expected to fail parsing");
            }
            catch (TsurgeonParseException)
            {
            }
            // yay
            try
            {
                tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO a b c");
                throw new AssertionError("Expected to fail parsing");
            }
            catch (TsurgeonParseException)
            {
            }
            // yay
            // Verify that it fails when the parents are different
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO left right");
            tregex   = TregexPattern.Compile("A << B=left << C=right");
            try
            {
                RunTest(tregex, tsurgeon, "(A (B 1) (D (C 2)))", "(A (B 1) (D (C 2)))");
                throw new AssertionError("Expected a runtime failure");
            }
            catch (TsurgeonRuntimeException)
            {
            }
        }
Esempio n. 16
0
        //Delete sentence-initial punctuation
        //Delete sentence final punctuation that is preceded by punctuation (first time)
        //Delete sentence final punctuation that is preceded by punctuation (second time)
        //Convert remaining sentence-final punctuation to either . if it is not [.!?]
        //Delete medial, sentence-final punctuation
        //Now move the sentence-final mark under SENT
        //For those trees that lack a sentence-final punc, add one.
        //Finally, delete these punctuation marks, which I can't seem to kill otherwise...
        //A bad MWADV tree in the training set
        // Not sure why this got a label of X.  Similar trees suggest it
        // should be A instead
        // This also seems to be mislabeled
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                log.Info("Usage: java " + typeof(Edu.Stanford.Nlp.International.French.Pipeline.FTBCorrector).FullName + " filename\n");
                System.Environment.Exit(-1);
            }
            ITreeTransformer tt = new Edu.Stanford.Nlp.International.French.Pipeline.FTBCorrector();
            File             f  = new File(args[0]);

            try
            {
                //These bad trees in the Candito training set should be thrown out:
                //  (ROOT (SENT (" ") (. .)))
                //  (ROOT (SENT (. .)))
                TregexPattern      pBadTree  = TregexPattern.Compile("@SENT <: @PUNC");
                TregexPattern      pBadTree2 = TregexPattern.Compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
                BufferedReader     br        = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
                ITreeReaderFactory trf       = new FrenchTreeReaderFactory();
                ITreeReader        tr        = trf.NewTreeReader(br);
                int nTrees = 0;
                for (Tree t; (t = tr.ReadTree()) != null; nTrees++)
                {
                    TregexMatcher m  = pBadTree.Matcher(t);
                    TregexMatcher m2 = pBadTree2.Matcher(t);
                    if (m.Find() || m2.Find())
                    {
                        log.Info("Discarding tree: " + t.ToString());
                    }
                    else
                    {
                        Tree fixedT = tt.TransformTree(t);
                        System.Console.Out.WriteLine(fixedT.ToString());
                    }
                }
                tr.Close();
                System.Console.Error.Printf("Wrote %d trees%n", nTrees);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (TregexParseException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Esempio n. 17
0
        /**
         * Parses a tsurgeon script text input and compiles a tregex pattern and a list
         * of tsurgeon operations into a pair.
         *
         * @param reader Reader to read patterns from
         * @return A pair of a tregex and tsurgeon pattern read from a file, or <code>null</code>
         *    when the operations in the Reader have been exhausted
         * @throws IOException If any IO problem
         */
        /*public static Tuple<TregexPattern, TsurgeonPattern> getOperationFromReader(BufferedReader reader, TregexPatternCompiler compiler) /*throws IOException#1# {
         * string patternString = getTregexPatternFromReader(reader);
         * if ("".equals(patternString)) {
         * return null;
         * }
         * TregexPattern matchPattern = compiler.compile(patternString);
         *
         * TsurgeonPattern collectedPattern = getTsurgeonOperationsFromReader(reader);
         * return new Pair<TregexPattern,TsurgeonPattern>(matchPattern,collectedPattern);
         * }*/

        /**
         * Assumes that we are at the beginning of a tsurgeon script file and gets the string for the
         * tregex pattern leading the file
         * @return tregex pattern string
         */
        /*public static string getTregexPatternFromReader(BufferedReader reader) throws IOException {
         * StringBuilder matchString = new StringBuilder();
         * for (string thisLine; (thisLine = reader.readLine()) != null; ) {
         * if (matchString.length() > 0 && emptyLinePattern.matcher(thisLine).matches()) {
         * // A blank line after getting some real content (not just comments or nothing)
         * break;
         * }
         * Matcher m = commentPattern.matcher(thisLine);
         * if (m.matches()) {
         * // delete it
         * thisLine = m.replaceFirst("");
         * }
         * if ( ! emptyLinePattern.matcher(thisLine).matches()) {
         * matchString.append(thisLine);
         * }
         * }
         * return matchString.ToString();
         * }*/

        /**
         * Assumes the given reader has only tsurgeon operations (not a tregex pattern), and parses
         * these out, collecting them into one operation.  Stops on a whitespace line.
         *
         * @throws IOException
         */
        /*public static TsurgeonPattern getTsurgeonOperationsFromReader(BufferedReader reader) throws IOException {
         * List<TsurgeonPattern> operations = new ArrayList<TsurgeonPattern>();
         * for (string thisLine; (thisLine = reader.readLine()) != null; ) {
         * if (emptyLinePattern.matcher(thisLine).matches()) {
         * break;
         * }
         * thisLine = removeComments(thisLine);
         * if (emptyLinePattern.matcher(thisLine).matches()) {
         * continue;
         * }
         * operations.add(parseOperation(thisLine));
         * }
         *
         * if (operations.size() == 0)
         * throw new TsurgeonParseException("No Tsurgeon operation provided.");
         *
         * return collectOperations(operations);
         * }*/


        /*private static string removeComments(string line) {
         *  Matcher m = commentPattern.matcher(line);
         *  line = m.replaceFirst("");
         *  Matcher m1 = escapedCommentCharacterPattern.matcher(line);
         *  line = m1.replaceAll(commentIntroducingCharacter);
         *  return line;
         * }*/


        /**
         * Assumes the given reader has only tsurgeon operations (not a tregex pattern), and returns
         * them as a String, mirroring the way the strings appear in the file. This is helpful
         * for lazy evaluation of the operations, as in a GUI,
         * because you do not parse the operations on load.  Comments are still excised.
         * @throws IOException
         */
        /*public static string getTsurgeonTextFromReader(BufferedReader reader) throws IOException {
         * StringBuilder sb = new StringBuilder();
         * for (string thisLine; (thisLine = reader.readLine()) != null; ) {
         * thisLine = removeComments(thisLine);
         * if (emptyLinePattern.matcher(thisLine).matches()) {
         * continue;
         * }
         * sb.append(thisLine);
         * sb.append('\n');
         * }
         * return sb.ToString();
         * }*/

        /**
         * Parses a tsurgeon script file and compiles all operations in the file into a list
         * of pairs of tregex and tsurgeon patterns.
         *
         * @param filename file containing the tsurgeon script
         * @return A pair of a tregex and tsurgeon pattern read from a file
         * @throws IOException If there is any I/O problem
         */
        /*public static List<Pair<TregexPattern, TsurgeonPattern>> getOperationsFromFile(string filename, string encoding, TregexPatternCompiler compiler) throws IOException {
         * List<Pair<TregexPattern,TsurgeonPattern>> operations = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
         * BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding));
         * for ( ; ; ) {
         * Pair<TregexPattern, TsurgeonPattern> operation = getOperationFromReader(reader, compiler);
         * if (operation == null) {
         * break;
         * }
         * operations.add(operation);
         * }
         * reader.close();
         * return operations;
         * }*/

        /// <summary>
        /// Applies {#processPattern} to a collection of trees.
        /// </summary>
        /// <param name="matchPattern">A {@link TregexPattern} to be matched against a {@link Tree}.</param>
        /// <param name="p">A {@link TsurgeonPattern} to apply.</param>
        /// <param name="inputTrees">The input trees to be processed</param>
        /// <returns>A List of the transformed trees</returns>
        public static List <Tree> ProcessPatternOnTrees(TregexPattern matchPattern, TsurgeonPattern p,
                                                        List <Tree> inputTrees)
        {
            var result = new List <Tree>();

            foreach (Tree tree in inputTrees)
            {
                result.Add(ProcessPattern(matchPattern, p, tree));
            }
            return(result);
        }
        public virtual void TestCoindex()
        {
            TregexPattern   tregex   = TregexPattern.Compile("A=foo << B=bar << C=baz");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("coindex foo bar baz");

            RunTest(tregex, tsurgeon, "(A (B (C foo)))", "(A-1 (B-1 (C-1 foo)))");
            // note that the indexing does not happen a second time, since the labels are now changed
            RunTest(tregex, tsurgeon, "(A (B foo) (C foo) (C bar))", "(A-1 (B-1 foo) (C-1 foo) (C bar))");
            // Test that it indexes at 2 instead of 1
            RunTest(tregex, tsurgeon, "(A (B foo) (C-1 bar) (C baz))", "(A-2 (B-2 foo) (C-1 bar) (C-2 baz))");
        }
Esempio n. 19
0
        private static void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            tree.IndexLeaves();
            SemanticGraph basicDependency    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhancedDependency == null)
            {
                enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            TregexPattern tgrepPattern = npOrPrpMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);

            while (matcher.Find())
            {
                Tree         t        = matcher.GetMatch();
                IList <Tree> mLeaves  = t.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                if (",".Equals(sent[endIdx - 1].Word()))
                {
                    endIdx--;
                }
                // try not to have span that ends with ,
                IntPair mSpan = new IntPair(beginIdx, endIdx);
                //      if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) {
                if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet) || t.Value().StartsWith("PRP")))
                {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                    if (m.originalSpan.Count > 1)
                    {
                        bool isNE = true;
                        foreach (CoreLabel cl in m.originalSpan)
                        {
                            if (!cl.Tag().StartsWith("NNP"))
                            {
                                isNE = false;
                            }
                        }
                        if (isNE)
                        {
                            namedEntitySpanSet.Add(mSpan);
                        }
                    }
                }
            }
        }
        public virtual void TestMultiplePatterns()
        {
            TregexPattern   tregex   = TregexPattern.Compile("A=foo < B=bar < C=baz");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[relabel baz BAZ] [move baz >-1 bar]");

            RunTest(tregex, tsurgeon, "(A (B foo) (C foo) (C bar))", "(A (B foo (BAZ foo) (BAZ bar)))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[relabel baz /^.*$/={bar}={baz}FOO/] [move baz >-1 bar]");
            RunTest(tregex, tsurgeon, "(A (B foo) (C foo) (C bar))", "(A (B foo (BCFOO foo) (BCFOO bar)))");
            // This in particular was a problem until we required "/" to be escaped
            tregex   = TregexPattern.Compile("A=foo < B=bar < C=baz < D=biff");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[relabel baz /^.*$/={bar}={baz}/] [relabel biff /^.*$/={bar}={biff}/]");
            RunTest(tregex, tsurgeon, "(A (B foo) (C bar) (D baz))", "(A (B foo) (BC bar) (BD baz))");
        }
 /// <summary>Find syntactic pattern in a sentence by tregex</summary>
 private void FindTreePattern(Tree tree, string tregex, ICollection <Pair <int, int> > foundPairs)
 {
     try
     {
         TregexPattern tgrepPattern = TregexPattern.Compile(tregex);
         FindTreePattern(tree, tgrepPattern, foundPairs);
     }
     catch (Exception e)
     {
         // shouldn't happen....
         throw new Exception(e);
     }
 }
Esempio n. 22
0
        /// <summary>
        /// Parses a tsurgeon script text input and compiles a tregex pattern and a list
        /// of tsurgeon operations into a pair.
        /// </summary>
        /// <param name="reader">Reader to read patterns from</param>
        /// <returns>
        /// A pair of a tregex and tsurgeon pattern read from a file, or
        /// <see langword="null"/>
        /// when the operations present in the Reader have been exhausted
        /// </returns>
        /// <exception cref="System.IO.IOException">If any IO problem</exception>
        public static Pair <TregexPattern, TsurgeonPattern> GetOperationFromReader(BufferedReader reader, TregexPatternCompiler compiler)
        {
            string patternString = GetTregexPatternFromReader(reader);

            // log.info("Read tregex pattern: " + patternString);
            if (patternString.IsEmpty())
            {
                return(null);
            }
            TregexPattern   matchPattern     = compiler.Compile(patternString);
            TsurgeonPattern collectedPattern = GetTsurgeonOperationsFromReader(reader);

            return(new Pair <TregexPattern, TsurgeonPattern>(matchPattern, collectedPattern));
        }
        public virtual void TestExcise()
        {
            // TODO: needs more meat to this test
            TregexPattern   tregex   = TregexPattern.Compile("__=repeat <: (~repeat < __)");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("excise repeat repeat");

            RunTest(tregex, tsurgeon, "(A (B (B foo)))", "(A (B foo))");
            // Test that if a deleted root is excised down to a level that has
            // just one child, that one child gets returned as the new tree
            RunTest(tregex, tsurgeon, "(B (B foo))", "(B foo)");
            tregex   = TregexPattern.Compile("A=root");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("excise root root");
            RunTest(tregex, tsurgeon, "(A (B bar) (C foo))", null);
        }
        public virtual void TestAdjoinWithNamedNode()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[adjoinF (D (E=target foot@)) bar] " + "[insert (G 1) $+ target]");
            TregexPattern   tregex   = TregexPattern.Compile("B=bar !>> D");

            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (G 1) (E (B C))))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[adjoinF (D (E=target foot@)) bar] " + "[insert (G 1) >0 target]");
            tregex   = TregexPattern.Compile("B=bar !>> D");
            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (E (G 1) (B C))))");
            // Named leaf
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[adjoinF (D (E foot@) F=target) bar] " + "[insert (G 1) >0 target]");
            tregex   = TregexPattern.Compile("B=bar !>> D");
            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (E (B C)) (F (G 1))))");
        }
        public virtual void TestInsertWithNamedNode()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[insert (D=target E) $+ bar] " + "[insert (F 1) >0 target]");
            TregexPattern   tregex   = TregexPattern.Compile("B=bar !$- D");

            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (F 1) E) (B C))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[insert (D=target E) $+ bar] " + "[insert (F 1) $+ target]");
            tregex   = TregexPattern.Compile("B=bar !$- D");
            RunTest(tregex, tsurgeon, "(A (B C))", "(A (F 1) (D E) (B C))");
            // Named leaf
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[insert (D E=target) $+ bar] " + "[insert (F 1) $+ target]");
            tregex   = TregexPattern.Compile("B=bar !$- D");
            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (F 1) E) (B C))");
        }
        public virtual void TestRelabel()
        {
            TsurgeonPattern tsurgeon;
            TregexPattern   tregex;

            tregex   = TregexPattern.Compile("/^((?!_head).)*$/=preTerminal < (__=terminal !< __)");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel preTerminal /^(.*)$/$1_head=={terminal}/");
            RunTest(tregex, tsurgeon, "($ $)", "($_head=$ $)");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel foo blah");
            tregex   = TregexPattern.Compile("B=foo");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (blah 0) (C 1))");
            RunTest(tregex, tsurgeon, "(A (B 0) (B 1))", "(A (blah 0) (blah 1))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel foo /\\//");
            tregex   = TregexPattern.Compile("B=foo");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (/ 0) (C 1))");
            RunTest(tregex, tsurgeon, "(A (B 0) (B 1))", "(A (/ 0) (/ 1))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel foo /.*(voc.*)/$1/");
            tregex   = TregexPattern.Compile("/^a.*t/=foo");
            RunTest(tregex, tsurgeon, "(A (avocet 0) (C 1))", "(A (vocet 0) (C 1))");
            RunTest(tregex, tsurgeon, "(A (avocet 0) (advocate 1))", "(A (vocet 0) (vocate 1))");
            tregex   = TregexPattern.Compile("curlew=baz < /^a(.*)t/#1%bar=foo");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel baz /cu(rle)w/={foo}/");
            RunTest(tregex, tsurgeon, "(curlew (avocet 0))", "(avocet (avocet 0))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel baz /cu(rle)w/%{bar}/");
            RunTest(tregex, tsurgeon, "(curlew (avocet 0))", "(voce (avocet 0))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel baz /cu(rle)w/$1/");
            RunTest(tregex, tsurgeon, "(curlew (avocet 0))", "(rle (avocet 0))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel baz /cu(rle)w/$1={foo}/");
            RunTest(tregex, tsurgeon, "(curlew (avocet 0))", "(rleavocet (avocet 0))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel baz /cu(rle)w/%{bar}$1={foo}/");
            RunTest(tregex, tsurgeon, "(curlew (avocet 0))", "(vocerleavocet (avocet 0))");
            tregex   = TregexPattern.Compile("A=baz < /curlew.*/=foo < /avocet.*/=bar");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel baz /^.*$/={foo}={bar}/");
            RunTest(tregex, tsurgeon, "(A (curlewfoo 0) (avocetzzz 1))", "(curlewfooavocetzzz (curlewfoo 0) (avocetzzz 1))");
            tregex   = TregexPattern.Compile("A=baz < /curle.*/=foo < /avo(.*)/#1%bar");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel baz /^(.*)$/={foo}$1%{bar}/");
            RunTest(tregex, tsurgeon, "(A (curlew 0) (avocet 1))", "(curlewAcet (curlew 0) (avocet 1))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel baz /^(.*)$/=foo$1%bar/");
            RunTest(tregex, tsurgeon, "(A (curlew 0) (avocet 1))", "(=fooA%bar (curlew 0) (avocet 1))");
            tregex   = TregexPattern.Compile("/foo/=foo");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel foo /foo/bar/");
            RunTest(tregex, tsurgeon, "(foofoo (curlew 0) (avocet 1))", "(barbar (curlew 0) (avocet 1))");
            tregex   = TregexPattern.Compile("/foo/=foo < /cur.*/=bar");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel foo /foo/={bar}/");
            RunTest(tregex, tsurgeon, "(foofoo (curlew 0) (avocet 1))", "(curlewcurlew (curlew 0) (avocet 1))");
            tregex   = TregexPattern.Compile("/^foo(.*)$/=foo");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel foo /foo(.*)$/bar$1/");
            RunTest(tregex, tsurgeon, "(foofoo (curlew 0) (avocet 1))", "(barfoo (curlew 0) (avocet 1))");
        }
        public virtual void TestPrune()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("prune bob");
            TregexPattern   tregex   = TregexPattern.Compile("B=bob");

            RunTest(tregex, tsurgeon, "(A (B (C 1)))", null);
            RunTest(tregex, tsurgeon, "(A (foo 1) (B (C 1)))", "(A (foo 1))");
            RunTest(tregex, tsurgeon, "(A (B 1) (B (C 1)))", null);
            RunTest(tregex, tsurgeon, "(A (foo 1) (bar (C 1)))", "(A (foo 1) (bar (C 1)))");
            tregex = TregexPattern.Compile("C=bob");
            RunTest(tregex, tsurgeon, "(A (B (C 1)))", null);
            RunTest(tregex, tsurgeon, "(A (foo 1) (B (C 1)))", "(A (foo 1))");
            RunTest(tregex, tsurgeon, "(A (B 1) (B (C 1)))", "(A (B 1))");
            RunTest(tregex, tsurgeon, "(A (foo 1) (bar (C 1)))", "(A (foo 1))");
        }
 public ArabicTreeNormalizer(bool retainNPTmp, bool markPRDverb, bool changeNoLabels, bool retainNPSbj, bool retainPPClr)
     : base(new ArabicTreebankLanguagePack())
 {
     this.retainNPTmp    = retainNPTmp;
     this.retainNPSbj    = retainNPSbj;
     this.markPRDverb    = markPRDverb;
     this.changeNoLabels = changeNoLabels;
     this.retainPPClr    = retainPPClr;
     rootLabel           = tlp.StartSymbol();
     prdVerbPattern      = TregexPattern.Compile("/^V[^P]/ > VP $ /-PRD$/=prd");
     prdPattern          = Pattern.Compile("^[A-Z]+-PRD");
     //Marks NP subjects that *do not* occur in verb-initial clauses
     npSbjPattern = TregexPattern.Compile("/^NP-SBJ/ !> @VP");
     emptyFilter  = new ArabicTreeNormalizer.ArabicEmptyFilter();
 }
Esempio n. 29
0
        /// <summary>Tries to match a pattern against a tree.</summary>
        /// <remarks>
        /// Tries to match a pattern against a tree.  If it succeeds, apply the surgical operations contained in a
        /// <see cref="TsurgeonPattern"/>
        /// .
        /// </remarks>
        /// <param name="matchPattern">
        /// A
        /// <see cref="Edu.Stanford.Nlp.Trees.Tregex.TregexPattern"/>
        /// to be matched against a
        /// <see cref="Edu.Stanford.Nlp.Trees.Tree"/>
        /// .
        /// </param>
        /// <param name="p">
        /// A
        /// <see cref="TsurgeonPattern"/>
        /// to apply.
        /// </param>
        /// <param name="t">
        /// the
        /// <see cref="Edu.Stanford.Nlp.Trees.Tree"/>
        /// to match against and perform surgery on.
        /// </param>
        /// <returns>t, which has been surgically modified.</returns>
        public static Tree ProcessPattern(TregexPattern matchPattern, TsurgeonPattern p, Tree t)
        {
            TregexMatcher   m   = matchPattern.Matcher(t);
            TsurgeonMatcher tsm = p.Matcher();

            while (m.Find())
            {
                t = tsm.Evaluate(t, m);
                if (t == null)
                {
                    break;
                }
                m = matchPattern.Matcher(t);
            }
            return(t);
        }
        public virtual void TestInsertDelete()
        {
            // The same bug as the Replace bug, but for a sequence of
            // insert/delete operations
            IList <Pair <TregexPattern, TsurgeonPattern> > surgery = new List <Pair <TregexPattern, TsurgeonPattern> >();
            TregexPattern   tregex   = TregexPattern.Compile("(/-([0-9]+)$/#1%i=src > /^FILLER$/) : (/^-NONE-/=dest <: /-([0-9]+)$/#1%i !$ ~src)");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert src $+ dest");

            surgery.Add(new Pair <TregexPattern, TsurgeonPattern>(tregex, tsurgeon));
            tregex   = TregexPattern.Compile("(/-([0-9]+)$/#1%i=src > /^FILLER$/) : (/^-NONE-/=dest <: /-([0-9]+)$/#1%i)");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("delete dest");
            surgery.Add(new Pair <TregexPattern, TsurgeonPattern>(tregex, tsurgeon));
            RunTest(surgery, "( (S (FILLER (NP-SBJ-1 (NNP Koito))) (VP (VBZ has) (VP (VBN refused) (S (NP-SBJ (-NONE- *-1)) (VP (TO to) (VP (VB grant) (NP (NNP Mr.) (NNP Pickens)) (NP (NP (NNS seats)) (PP-LOC (IN on) (NP (PRP$ its) (NN board))))))) (, ,) (S-ADV (NP-SBJ (-NONE- *-1)) (VP (VBG asserting) (SBAR (-NONE- 0) (S (NP-SBJ (PRP he)) (VP (VBZ is) (NP-PRD (NP (DT a) (NN greenmailer)) (VP (VBG trying) (S (NP-SBJ (-NONE- *)) (VP (TO to) (VP (VB pressure) (NP (NP (NNP Koito) (POS 's)) (JJ other) (NNS shareholders)) (PP-CLR (IN into) (S-NOM (NP-SBJ (-NONE- *)) (VP (VBG buying) (NP (PRP him)) (PRT (RP out)) (PP-MNR (IN at) (NP (DT a) (NN profit)))))))))))))))))) (. .)))"
                    , "( (S (FILLER (NP-SBJ-1 (NNP Koito))) (VP (VBZ has) (VP (VBN refused) (S (NP-SBJ (NP-SBJ-1 (NNP Koito))) (VP (TO to) (VP (VB grant) (NP (NNP Mr.) (NNP Pickens)) (NP (NP (NNS seats)) (PP-LOC (IN on) (NP (PRP$ its) (NN board))))))) (, ,) (S-ADV (NP-SBJ (NP-SBJ-1 (NNP Koito))) (VP (VBG asserting) (SBAR (-NONE- 0) (S (NP-SBJ (PRP he)) (VP (VBZ is) (NP-PRD (NP (DT a) (NN greenmailer)) (VP (VBG trying) (S (NP-SBJ (-NONE- *)) (VP (TO to) (VP (VB pressure) (NP (NP (NNP Koito) (POS 's)) (JJ other) (NNS shareholders)) (PP-CLR (IN into) (S-NOM (NP-SBJ (-NONE- *)) (VP (VBG buying) (NP (PRP him)) (PRT (RP out)) (PP-MNR (IN at) (NP (DT a) (NN profit)))))))))))))))))) (. .)))"
                    );
        }
Esempio n. 31
0
        /**
        * Parses a tsurgeon script text input and compiles a tregex pattern and a list
        * of tsurgeon operations into a pair.
        *
        * @param reader Reader to read patterns from
        * @return A pair of a tregex and tsurgeon pattern read from a file, or <code>null</code>
        *    when the operations in the Reader have been exhausted
        * @throws IOException If any IO problem
        */
            /*public static Tuple<TregexPattern, TsurgeonPattern> getOperationFromReader(BufferedReader reader, TregexPatternCompiler compiler) /*throws IOException#1# {
        string patternString = getTregexPatternFromReader(reader);
        if ("".equals(patternString)) {
            return null;
        }
        TregexPattern matchPattern = compiler.compile(patternString);

        TsurgeonPattern collectedPattern = getTsurgeonOperationsFromReader(reader);
        return new Pair<TregexPattern,TsurgeonPattern>(matchPattern,collectedPattern);
        }*/

        /**
        * Assumes that we are at the beginning of a tsurgeon script file and gets the string for the
        * tregex pattern leading the file
        * @return tregex pattern string
        */
            /*public static string getTregexPatternFromReader(BufferedReader reader) throws IOException {
        StringBuilder matchString = new StringBuilder();
        for (string thisLine; (thisLine = reader.readLine()) != null; ) {
            if (matchString.length() > 0 && emptyLinePattern.matcher(thisLine).matches()) {
            // A blank line after getting some real content (not just comments or nothing)
            break;
            }
            Matcher m = commentPattern.matcher(thisLine);
            if (m.matches()) {
            // delete it
            thisLine = m.replaceFirst("");
            }
            if ( ! emptyLinePattern.matcher(thisLine).matches()) {
            matchString.append(thisLine);
            }
        }
        return matchString.ToString();
        }*/

        /**
        * Assumes the given reader has only tsurgeon operations (not a tregex pattern), and parses
        * these out, collecting them into one operation.  Stops on a whitespace line.
        *
        * @throws IOException
        */
            /*public static TsurgeonPattern getTsurgeonOperationsFromReader(BufferedReader reader) throws IOException {
        List<TsurgeonPattern> operations = new ArrayList<TsurgeonPattern>();
        for (string thisLine; (thisLine = reader.readLine()) != null; ) {
            if (emptyLinePattern.matcher(thisLine).matches()) {
            break;
            }
            thisLine = removeComments(thisLine);
            if (emptyLinePattern.matcher(thisLine).matches()) {
            continue;
            }
            operations.add(parseOperation(thisLine));
        }

        if (operations.size() == 0)
            throw new TsurgeonParseException("No Tsurgeon operation provided.");

        return collectOperations(operations);
        }*/


        /*private static string removeComments(string line) {
            Matcher m = commentPattern.matcher(line);
            line = m.replaceFirst("");
            Matcher m1 = escapedCommentCharacterPattern.matcher(line);
            line = m1.replaceAll(commentIntroducingCharacter);
            return line;
          }*/


        /**
       * Assumes the given reader has only tsurgeon operations (not a tregex pattern), and returns
       * them as a String, mirroring the way the strings appear in the file. This is helpful
       * for lazy evaluation of the operations, as in a GUI,
       * because you do not parse the operations on load.  Comments are still excised.
       * @throws IOException
       */
            /*public static string getTsurgeonTextFromReader(BufferedReader reader) throws IOException {
        StringBuilder sb = new StringBuilder();
        for (string thisLine; (thisLine = reader.readLine()) != null; ) {
          thisLine = removeComments(thisLine);
          if (emptyLinePattern.matcher(thisLine).matches()) {
            continue;
          }
          sb.append(thisLine);
          sb.append('\n');
        }
        return sb.ToString();
      }*/

        /**
       * Parses a tsurgeon script file and compiles all operations in the file into a list
       * of pairs of tregex and tsurgeon patterns.
       *
       * @param filename file containing the tsurgeon script
       * @return A pair of a tregex and tsurgeon pattern read from a file
       * @throws IOException If there is any I/O problem
       */
            /*public static List<Pair<TregexPattern, TsurgeonPattern>> getOperationsFromFile(string filename, string encoding, TregexPatternCompiler compiler) throws IOException {
        List<Pair<TregexPattern,TsurgeonPattern>> operations = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding));
        for ( ; ; ) {
          Pair<TregexPattern, TsurgeonPattern> operation = getOperationFromReader(reader, compiler);
          if (operation == null) {
            break;
          }
          operations.add(operation);
        }
        reader.close();
        return operations;
      }*/

        /// <summary>
        /// Applies {#processPattern} to a collection of trees.
        /// </summary>
        /// <param name="matchPattern">A {@link TregexPattern} to be matched against a {@link Tree}.</param>
        /// <param name="p">A {@link TsurgeonPattern} to apply.</param>
        /// <param name="inputTrees">The input trees to be processed</param>
        /// <returns>A List of the transformed trees</returns>
        public static List<Tree> ProcessPatternOnTrees(TregexPattern matchPattern, TsurgeonPattern p,
            List<Tree> inputTrees)
        {
            var result = new List<Tree>();
            foreach (Tree tree in inputTrees)
            {
                result.Add(ProcessPattern(matchPattern, p, tree));
            }
            return result;
        }
Esempio n. 32
0
 /// <summary>
 /// Tries to match a pattern against a tree.  If it succeeds, apply the surgical operations contained in a {@link TsurgeonPattern}.
 /// </summary>
 /// <param name="matchPattern">A {@link TregexPattern} to be matched against a {@link Tree}.</param>
 /// <param name="p">A {@link TsurgeonPattern} to apply.</param>
 /// <param name="t">the {@link Tree} to match against and perform surgery on.</param>
 /// <returns>t, which has been surgically modified.</returns>
 public static Tree ProcessPattern(TregexPattern matchPattern, TsurgeonPattern p, Tree t)
 {
     TregexMatcher m = matchPattern.Matcher(t);
     TsurgeonMatcher tsm = p.GetMatcher();
     while (m.Find())
     {
         t = tsm.Evaluate(t, m);
         if (t == null)
         {
             break;
         }
         m = matchPattern.Matcher(t);
     }
     return t;
 }