public virtual void TestInsert()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E 6)) $+ bar");
            TregexPattern   tregex   = TregexPattern.Compile("B=bar !$ D");

            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (D (E 6)) (B 0) (C 1))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E 6)) $- bar");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B 0) (D (E 6)) (C 1))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E 6)) >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<D");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (D (E 6)) 0) (C 1))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert foo >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<C $C=foo");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (C 1) 0) (C 1))");
            // the name will be cut off
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E=blah 6)) >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<D");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (D (E 6)) 0) (C 1))");
            // the name should not be cut off, with the escaped = unescaped now
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E\\=blah 6)) >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<D");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (D (E=blah 6)) 0) (C 1))");
            // the name should be cut off again, with a \ at the end of the new node
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert (D (E\\\\=blah 6)) >0 bar");
            tregex   = TregexPattern.Compile("B=bar !<D");
            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (B (D (E\\ 6)) 0) (C 1))");
        }
        public virtual void TestReplaceTree()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace foo (BAR 1)");
            TregexPattern   tregex   = TregexPattern.Compile("B=foo");

            RunTest(tregex, tsurgeon, "(A (B 0) (B 1) (C 2))", "(A (BAR 1) (BAR 1) (C 2))");
            // test that a single replacement at the root is allowed
            RunTest(tregex, tsurgeon, "(B (C 1))", "(BAR 1)");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace foo (BAR 1) (BAZ 2)");
            RunTest(tregex, tsurgeon, "(A (B 0) (B 1) (C 2))", "(A (BAR 1) (BAZ 2) (BAR 1) (BAZ 2) (C 2))");
            try
            {
                RunTest(tregex, tsurgeon, "(B 0)", "(B 0)");
                throw new Exception("Expected a failure");
            }
            catch (TsurgeonRuntimeException)
            {
            }
            // good, we expected to fail if you try to replace the root node with two nodes
            // it is possible for numbers to work and words to not work if
            // the tsurgeon parser is not correct
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace foo (BAR blah)");
            tregex   = TregexPattern.Compile("B=foo");
            RunTest(tregex, tsurgeon, "(A (B 0) (B 1) (C 2))", "(A (BAR blah) (BAR blah) (C 2))");
        }
        public virtual void TestForeign()
        {
            TregexPattern   tregex   = TregexPattern.Compile("atentát=test");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel test perform_atentát");

            RunTest(tregex, tsurgeon, "(foo atentát)", "(foo perform_atentát)");
        }
Beispiel #4
0
 public RelabelNode(TsurgeonPattern child, string newLabel)
     : base("relabel", new TsurgeonPattern[] { child })
 {
     // Overly complicated pattern to identify regexes surrounded by /,
     // possibly with / escaped inside the regex.
     // The purpose of the [^/]*[^/\\\\] is to match characters that
     // aren't / and to allow escaping of other characters.
     // The purpose of the \\\\/ is to allow escaped / inside the pattern.
     // The purpose of the \\\\\\\\ is to allow escaped \ at the end of
     // the pattern, so you can match, for example, /\\/.  There need to
     // be 8x\ because both java and regexes need escaping, resulting in 4x.
     Java.Util.Regex.Matcher m1 = substPattern.Matcher(newLabel);
     if (m1.Matches())
     {
         mode                   = RelabelNode.RelabelMode.Regex;
         this.labelRegex        = Pattern.Compile(m1.Group(1));
         this.replacementString = m1.Group(2);
         replacementPieces      = new List <string>();
         Java.Util.Regex.Matcher generalMatcher = oneGeneralReplacementPattern.Matcher(m1.Group(2));
         int lastPosition = 0;
         while (generalMatcher.Find())
         {
             if (generalMatcher.Start() > lastPosition)
             {
                 replacementPieces.Add(Sharpen.Runtime.Substring(replacementString, lastPosition, generalMatcher.Start()));
             }
             lastPosition = generalMatcher.End();
             string piece = generalMatcher.Group();
             if (piece.Equals(string.Empty))
             {
                 continue;
             }
             replacementPieces.Add(generalMatcher.Group());
         }
         if (lastPosition < replacementString.Length)
         {
             replacementPieces.Add(Sharpen.Runtime.Substring(replacementString, lastPosition));
         }
         this.newLabel = null;
     }
     else
     {
         mode = RelabelNode.RelabelMode.Fixed;
         Java.Util.Regex.Matcher m2 = regexPattern.Matcher(newLabel);
         if (m2.Matches())
         {
             // fixed relabel but surrounded by regex slashes
             string unescapedLabel = m2.Group(1);
             this.newLabel = RemoveEscapeSlashes(unescapedLabel);
         }
         else
         {
             // just a node name to relabel to
             this.newLabel = newLabel;
         }
         this.replacementString = null;
         this.replacementPieces = null;
         this.labelRegex        = null;
     }
 }
        public virtual void TestBackReference()
        {
            TregexPattern   tregex   = TregexPattern.Compile("__ <1 B=n <2 ~n");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel n X");

            RunTest(tregex, tsurgeon, "(A (B w) (B w))", "(A (X w) (B w))");
        }
        public virtual void TestKeyword()
        {
            // This should successfully compile, assuming the keyword parsing is correct
            TregexPattern   tregex   = TregexPattern.Compile("A=foo << B=bar << C=baz");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("relabel foo relabel");

            RunTest(tregex, tsurgeon, "(A (B foo) (C foo) (C bar))", "(relabel (B foo) (C foo) (C bar))");
        }
        public virtual void TestChineseReplaceTree()
        {
            string          input    = "(IP (IP (PP (P 像) (NP (NP (NR 赖斯) (PU ,) (NR 赖斯)) (NP (PN 本身)))) (PU 她{) (NP (NN breath)) (PU }) (IJ 呃) (VP (VV 担任) (NP (NN 国务卿)) (VP (ADVP (AD 比较)) (VP (VA 晚))))))";
            string          expected = "(IP (IP (PP (P 像) (NP (NP (NR 赖斯) (PU ,) (NR 赖斯)) (NP (PN 本身)))) (PN 她) (PU {) (NP (NN breath)) (PU }) (IJ 呃) (VP (VV 担任) (NP (NN 国务卿)) (VP (ADVP (AD 比较)) (VP (VA 晚))))))";
            TregexPattern   tregex   = TregexPattern.Compile("PU=punc < 她{");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace punc (PN 她) (PU {)");

            RunTest(tregex, tsurgeon, input, expected);
        }
 public AdjoinNode(string name, AuxiliaryTree t, TsurgeonPattern p)
     : base(name, new TsurgeonPattern[] { p })
 {
     if (t == null || p == null)
     {
         throw new ArgumentNullException("AdjoinNode: illegal null argument, t=" + t + ", p=" + p);
     }
     adjunctionTree = t;
 }
        public virtual void TestCreateSubtrees()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO left right");
            TregexPattern   tregex   = TregexPattern.Compile("A < B=left < C=right");

            // Verify when there are only two nodes
            RunTest(tregex, tsurgeon, "(A (B 1) (C 2))", "(A (FOO (B 1) (C 2)))");
            // We allow backwards nodes as well
            RunTest(tregex, tsurgeon, "(A (C 1) (B 2))", "(A (FOO (C 1) (B 2)))");
            // Check nodes in between
            RunTest(tregex, tsurgeon, "(A (B 1) (D 3) (C 2))", "(A (FOO (B 1) (D 3) (C 2)))");
            // Check nodes outside the span
            RunTest(tregex, tsurgeon, "(A (D 3) (B 1) (C 2))", "(A (D 3) (FOO (B 1) (C 2)))");
            RunTest(tregex, tsurgeon, "(A (B 1) (C 2) (D 3))", "(A (FOO (B 1) (C 2)) (D 3))");
            RunTest(tregex, tsurgeon, "(A (D 3) (B 1) (C 2) (E 4))", "(A (D 3) (FOO (B 1) (C 2)) (E 4))");
            // Check when the two endpoints are the same
            tregex = TregexPattern.Compile("A < B=left < B=right");
            RunTest(tregex, tsurgeon, "(A (B 1) (C 2))", "(A (FOO (B 1)) (C 2))");
            // Check double operation - should make two FOO nodes and then stop
            RunTest(tregex, tsurgeon, "(A (B 1) (B 2))", "(A (FOO (B 1)) (FOO (B 2)))");
            // Check when we only have one argument to createSubtree
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO child");
            tregex   = TregexPattern.Compile("A < B=child");
            RunTest(tregex, tsurgeon, "(A (B 1) (C 2))", "(A (FOO (B 1)) (C 2))");
            RunTest(tregex, tsurgeon, "(A (B 1) (B 2))", "(A (FOO (B 1)) (FOO (B 2)))");
            // Check that incorrectly formatted operations don't successfully parse
            try
            {
                tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO");
                throw new AssertionError("Expected to fail parsing");
            }
            catch (TsurgeonParseException)
            {
            }
            // yay
            try
            {
                tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO a b c");
                throw new AssertionError("Expected to fail parsing");
            }
            catch (TsurgeonParseException)
            {
            }
            // yay
            // Verify that it fails when the parents are different
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("createSubtree FOO left right");
            tregex   = TregexPattern.Compile("A << B=left << C=right");
            try
            {
                RunTest(tregex, tsurgeon, "(A (B 1) (D (C 2)))", "(A (B 1) (D (C 2)))");
                throw new AssertionError("Expected a runtime failure");
            }
            catch (TsurgeonRuntimeException)
            {
            }
        }
        public virtual void TestCoindex()
        {
            TregexPattern   tregex   = TregexPattern.Compile("A=foo << B=bar << C=baz");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("coindex foo bar baz");

            RunTest(tregex, tsurgeon, "(A (B (C foo)))", "(A-1 (B-1 (C-1 foo)))");
            // note that the indexing does not happen a second time, since the labels are now changed
            RunTest(tregex, tsurgeon, "(A (B foo) (C foo) (C bar))", "(A-1 (B-1 foo) (C-1 foo) (C bar))");
            // Test that it indexes at 2 instead of 1
            RunTest(tregex, tsurgeon, "(A (B foo) (C-1 bar) (C baz))", "(A-2 (B-2 foo) (C-1 bar) (C-2 baz))");
        }
        public virtual void TestMultiplePatterns()
        {
            TregexPattern   tregex   = TregexPattern.Compile("A=foo < B=bar < C=baz");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[relabel baz BAZ] [move baz >-1 bar]");

            RunTest(tregex, tsurgeon, "(A (B foo) (C foo) (C bar))", "(A (B foo (BAZ foo) (BAZ bar)))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[relabel baz /^.*$/={bar}={baz}FOO/] [move baz >-1 bar]");
            RunTest(tregex, tsurgeon, "(A (B foo) (C foo) (C bar))", "(A (B foo (BCFOO foo) (BCFOO bar)))");
            // This in particular was a problem until we required "/" to be escaped
            tregex   = TregexPattern.Compile("A=foo < B=bar < C=baz < D=biff");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[relabel baz /^.*$/={bar}={baz}/] [relabel biff /^.*$/={bar}={biff}/]");
            RunTest(tregex, tsurgeon, "(A (B foo) (C bar) (D baz))", "(A (B foo) (BC bar) (BD baz))");
        }
Beispiel #12
0
 public TsurgeonMatcher(TsurgeonPattern pattern, IDictionary <string, Tree> newNodeNames, CoindexationGenerator coindexer)
 {
     // TODO: ideally we should have the tree and the tregex matcher be
     // part of this as well.  That would involve putting some of the
     // functionality in Tsurgeon.java in this object
     this.newNodeNames = newNodeNames;
     this.coindexer    = coindexer;
     this.childMatcher = new Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.TsurgeonMatcher[pattern.children.Length];
     for (int i = 0; i < pattern.children.Length; ++i)
     {
         this.childMatcher[i] = pattern.children[i].Matcher(newNodeNames, coindexer);
     }
 }
Beispiel #13
0
        /// <summary>
        /// Parses a tsurgeon script text input and compiles a tregex pattern and a list
        /// of tsurgeon operations into a pair.
        /// </summary>
        /// <param name="reader">Reader to read patterns from</param>
        /// <returns>
        /// A pair of a tregex and tsurgeon pattern read from a file, or
        /// <see langword="null"/>
        /// when the operations present in the Reader have been exhausted
        /// </returns>
        /// <exception cref="System.IO.IOException">If any IO problem</exception>
        public static Pair <TregexPattern, TsurgeonPattern> GetOperationFromReader(BufferedReader reader, TregexPatternCompiler compiler)
        {
            string patternString = GetTregexPatternFromReader(reader);

            // log.info("Read tregex pattern: " + patternString);
            if (patternString.IsEmpty())
            {
                return(null);
            }
            TregexPattern   matchPattern     = compiler.Compile(patternString);
            TsurgeonPattern collectedPattern = GetTsurgeonOperationsFromReader(reader);

            return(new Pair <TregexPattern, TsurgeonPattern>(matchPattern, collectedPattern));
        }
        public virtual void TestAdjoinWithNamedNode()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[adjoinF (D (E=target foot@)) bar] " + "[insert (G 1) $+ target]");
            TregexPattern   tregex   = TregexPattern.Compile("B=bar !>> D");

            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (G 1) (E (B C))))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[adjoinF (D (E=target foot@)) bar] " + "[insert (G 1) >0 target]");
            tregex   = TregexPattern.Compile("B=bar !>> D");
            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (E (G 1) (B C))))");
            // Named leaf
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[adjoinF (D (E foot@) F=target) bar] " + "[insert (G 1) >0 target]");
            tregex   = TregexPattern.Compile("B=bar !>> D");
            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (E (B C)) (F (G 1))))");
        }
        public virtual void TestInsertWithNamedNode()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[insert (D=target E) $+ bar] " + "[insert (F 1) >0 target]");
            TregexPattern   tregex   = TregexPattern.Compile("B=bar !$- D");

            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (F 1) E) (B C))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[insert (D=target E) $+ bar] " + "[insert (F 1) $+ target]");
            tregex   = TregexPattern.Compile("B=bar !$- D");
            RunTest(tregex, tsurgeon, "(A (B C))", "(A (F 1) (D E) (B C))");
            // Named leaf
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[insert (D E=target) $+ bar] " + "[insert (F 1) $+ target]");
            tregex   = TregexPattern.Compile("B=bar !$- D");
            RunTest(tregex, tsurgeon, "(A (B C))", "(A (D (F 1) E) (B C))");
        }
        public virtual void TestExcise()
        {
            // TODO: needs more meat to this test
            TregexPattern   tregex   = TregexPattern.Compile("__=repeat <: (~repeat < __)");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("excise repeat repeat");

            RunTest(tregex, tsurgeon, "(A (B (B foo)))", "(A (B foo))");
            // Test that if a deleted root is excised down to a level that has
            // just one child, that one child gets returned as the new tree
            RunTest(tregex, tsurgeon, "(B (B foo))", "(B foo)");
            tregex   = TregexPattern.Compile("A=root");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("excise root root");
            RunTest(tregex, tsurgeon, "(A (B bar) (C foo))", null);
        }
        public virtual void TestPrune()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("prune bob");
            TregexPattern   tregex   = TregexPattern.Compile("B=bob");

            RunTest(tregex, tsurgeon, "(A (B (C 1)))", null);
            RunTest(tregex, tsurgeon, "(A (foo 1) (B (C 1)))", "(A (foo 1))");
            RunTest(tregex, tsurgeon, "(A (B 1) (B (C 1)))", null);
            RunTest(tregex, tsurgeon, "(A (foo 1) (bar (C 1)))", "(A (foo 1) (bar (C 1)))");
            tregex = TregexPattern.Compile("C=bob");
            RunTest(tregex, tsurgeon, "(A (B (C 1)))", null);
            RunTest(tregex, tsurgeon, "(A (foo 1) (B (C 1)))", "(A (foo 1))");
            RunTest(tregex, tsurgeon, "(A (B 1) (B (C 1)))", "(A (B 1))");
            RunTest(tregex, tsurgeon, "(A (foo 1) (bar (C 1)))", "(A (foo 1))");
        }
        public virtual void TestInsertDelete()
        {
            // The same bug as the Replace bug, but for a sequence of
            // insert/delete operations
            IList <Pair <TregexPattern, TsurgeonPattern> > surgery = new List <Pair <TregexPattern, TsurgeonPattern> >();
            TregexPattern   tregex   = TregexPattern.Compile("(/-([0-9]+)$/#1%i=src > /^FILLER$/) : (/^-NONE-/=dest <: /-([0-9]+)$/#1%i !$ ~src)");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("insert src $+ dest");

            surgery.Add(new Pair <TregexPattern, TsurgeonPattern>(tregex, tsurgeon));
            tregex   = TregexPattern.Compile("(/-([0-9]+)$/#1%i=src > /^FILLER$/) : (/^-NONE-/=dest <: /-([0-9]+)$/#1%i)");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("delete dest");
            surgery.Add(new Pair <TregexPattern, TsurgeonPattern>(tregex, tsurgeon));
            RunTest(surgery, "( (S (FILLER (NP-SBJ-1 (NNP Koito))) (VP (VBZ has) (VP (VBN refused) (S (NP-SBJ (-NONE- *-1)) (VP (TO to) (VP (VB grant) (NP (NNP Mr.) (NNP Pickens)) (NP (NP (NNS seats)) (PP-LOC (IN on) (NP (PRP$ its) (NN board))))))) (, ,) (S-ADV (NP-SBJ (-NONE- *-1)) (VP (VBG asserting) (SBAR (-NONE- 0) (S (NP-SBJ (PRP he)) (VP (VBZ is) (NP-PRD (NP (DT a) (NN greenmailer)) (VP (VBG trying) (S (NP-SBJ (-NONE- *)) (VP (TO to) (VP (VB pressure) (NP (NP (NNP Koito) (POS 's)) (JJ other) (NNS shareholders)) (PP-CLR (IN into) (S-NOM (NP-SBJ (-NONE- *)) (VP (VBG buying) (NP (PRP him)) (PRT (RP out)) (PP-MNR (IN at) (NP (DT a) (NN profit)))))))))))))))))) (. .)))"
                    , "( (S (FILLER (NP-SBJ-1 (NNP Koito))) (VP (VBZ has) (VP (VBN refused) (S (NP-SBJ (NP-SBJ-1 (NNP Koito))) (VP (TO to) (VP (VB grant) (NP (NNP Mr.) (NNP Pickens)) (NP (NP (NNS seats)) (PP-LOC (IN on) (NP (PRP$ its) (NN board))))))) (, ,) (S-ADV (NP-SBJ (NP-SBJ-1 (NNP Koito))) (VP (VBG asserting) (SBAR (-NONE- 0) (S (NP-SBJ (PRP he)) (VP (VBZ is) (NP-PRD (NP (DT a) (NN greenmailer)) (VP (VBG trying) (S (NP-SBJ (-NONE- *)) (VP (TO to) (VP (VB pressure) (NP (NP (NNP Koito) (POS 's)) (JJ other) (NNS shareholders)) (PP-CLR (IN into) (S-NOM (NP-SBJ (-NONE- *)) (VP (VBG buying) (NP (PRP him)) (PRT (RP out)) (PP-MNR (IN at) (NP (DT a) (NN profit)))))))))))))))))) (. .)))"
                    );
        }
Beispiel #19
0
        /// <summary>Tries to match a pattern against a tree.</summary>
        /// <remarks>
        /// Tries to match a pattern against a tree.  If it succeeds, apply the surgical operations contained in a
        /// <see cref="TsurgeonPattern"/>
        /// .
        /// </remarks>
        /// <param name="matchPattern">
        /// A
        /// <see cref="Edu.Stanford.Nlp.Trees.Tregex.TregexPattern"/>
        /// to be matched against a
        /// <see cref="Edu.Stanford.Nlp.Trees.Tree"/>
        /// .
        /// </param>
        /// <param name="p">
        /// A
        /// <see cref="TsurgeonPattern"/>
        /// to apply.
        /// </param>
        /// <param name="t">
        /// the
        /// <see cref="Edu.Stanford.Nlp.Trees.Tree"/>
        /// to match against and perform surgery on.
        /// </param>
        /// <returns>t, which has been surgically modified.</returns>
        public static Tree ProcessPattern(TregexPattern matchPattern, TsurgeonPattern p, Tree t)
        {
            TregexMatcher   m   = matchPattern.Matcher(t);
            TsurgeonMatcher tsm = p.Matcher();

            while (m.Find())
            {
                t = tsm.Evaluate(t, m);
                if (t == null)
                {
                    break;
                }
                m = matchPattern.Matcher(t);
            }
            return(t);
        }
        public virtual void TestReplaceNode()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace foo blah");
            TregexPattern   tregex   = TregexPattern.Compile("B=foo : C=blah");

            RunTest(tregex, tsurgeon, "(A (B 0) (C 1))", "(A (C 1) (C 1))");
            // This test was a bug reported by a user; only one of the -NONE-
            // nodes was being replaced.  This was because the replace was
            // reusing existing tree nodes instead of creating new ones, which
            // caused tregex to fail to find the second replacement
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("replace dest src");
            tregex   = TregexPattern.Compile("(/-([0-9]+)$/#1%i=src > /^FILLER$/) : (/^-NONE-/=dest <: /-([0-9]+)$/#1%i)");
            RunTest(tregex, tsurgeon, "( (S (FILLER (NP-SBJ-1 (NNP Koito))) (VP (VBZ has) (VP (VBN refused) (S (NP-SBJ (-NONE- *-1)) (VP (TO to) (VP (VB grant) (NP (NNP Mr.) (NNP Pickens)) (NP (NP (NNS seats)) (PP-LOC (IN on) (NP (PRP$ its) (NN board))))))) (, ,) (S-ADV (NP-SBJ (-NONE- *-1)) (VP (VBG asserting) (SBAR (-NONE- 0) (S (NP-SBJ (PRP he)) (VP (VBZ is) (NP-PRD (NP (DT a) (NN greenmailer)) (VP (VBG trying) (S (NP-SBJ (-NONE- *)) (VP (TO to) (VP (VB pressure) (NP (NP (NNP Koito) (POS 's)) (JJ other) (NNS shareholders)) (PP-CLR (IN into) (S-NOM (NP-SBJ (-NONE- *)) (VP (VBG buying) (NP (PRP him)) (PRT (RP out)) (PP-MNR (IN at) (NP (DT a) (NN profit)))))))))))))))))) (. .)))"
                    , "( (S (FILLER (NP-SBJ-1 (NNP Koito))) (VP (VBZ has) (VP (VBN refused) (S (NP-SBJ (NP-SBJ-1 (NNP Koito))) (VP (TO to) (VP (VB grant) (NP (NNP Mr.) (NNP Pickens)) (NP (NP (NNS seats)) (PP-LOC (IN on) (NP (PRP$ its) (NN board))))))) (, ,) (S-ADV (NP-SBJ (NP-SBJ-1 (NNP Koito))) (VP (VBG asserting) (SBAR (-NONE- 0) (S (NP-SBJ (PRP he)) (VP (VBZ is) (NP-PRD (NP (DT a) (NN greenmailer)) (VP (VBG trying) (S (NP-SBJ (-NONE- *)) (VP (TO to) (VP (VB pressure) (NP (NP (NNP Koito) (POS 's)) (JJ other) (NNS shareholders)) (PP-CLR (IN into) (S-NOM (NP-SBJ (-NONE- *)) (VP (VBG buying) (NP (PRP him)) (PRT (RP out)) (PP-MNR (IN at) (NP (DT a) (NN profit)))))))))))))))))) (. .)))"
                    );
        }
        public static void OutputResults(TregexPattern tregex, TsurgeonPattern tsurgeon, string input)
        {
            System.Console.Out.WriteLine("Tsurgeon: " + tsurgeon);
            System.Console.Out.WriteLine("Tregex: " + tregex);
            TregexMatcher m = tregex.Matcher(TreeFromString(input));

            if (m.Find())
            {
                System.Console.Error.WriteLine(" Matched");
            }
            else
            {
                System.Console.Error.WriteLine(" Did not match");
            }
            Tree result = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ProcessPattern(tregex, tsurgeon, TreeFromString(input));

            System.Console.Out.WriteLine(result);
        }
        public static void RunTest(TregexPattern tregex, TsurgeonPattern tsurgeon, string input, string expected)
        {
            Tree result = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ProcessPattern(tregex, tsurgeon, TreeFromString(input));

            if (expected == null)
            {
                NUnit.Framework.Assert.AreEqual(null, result);
            }
            else
            {
                NUnit.Framework.Assert.AreEqual(expected, result.ToString());
            }
            // run the test on both a list and as a single pattern just to
            // make sure the underlying code works for both
            Pair <TregexPattern, TsurgeonPattern> surgery = new Pair <TregexPattern, TsurgeonPattern>(tregex, tsurgeon);

            RunTest(Java.Util.Collections.SingletonList(surgery), input, expected);
        }
        public virtual void TestAdjoinH()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("adjoinH (FOO (BAR@)) foo");
            TregexPattern   tregex   = TregexPattern.Compile("B=foo !< BAR");

            RunTest(tregex, tsurgeon, "(A (B 1 2))", "(A (B (BAR 1 2)))");
            RunTest(tregex, tsurgeon, "(A (C 1 2))", "(A (C 1 2))");
            RunTest(tregex, tsurgeon, "(A (B (B 1 2)))", "(A (B (BAR (B (BAR 1 2)))))");
            Tree          tree    = TreeFromString("(A (B 1 2))");
            TregexMatcher matcher = tregex.Matcher(tree);

            NUnit.Framework.Assert.IsTrue(matcher.Find());
            NUnit.Framework.Assert.AreEqual("(B 1 2)", matcher.GetNode("foo").ToString());
            Tree updated = tsurgeon.Matcher().Evaluate(tree, matcher);

            NUnit.Framework.Assert.AreEqual("(A (B (BAR 1 2)))", updated.ToString());
            NUnit.Framework.Assert.AreEqual("(B (BAR 1 2))", matcher.GetNode("foo").ToString());
            NUnit.Framework.Assert.IsFalse(matcher.Find());
        }
        public virtual void TestIfExists()
        {
            // This should successfully compile, assuming the keyword parsing is correct
            TregexPattern   tregex   = TregexPattern.Compile("A=foo [ << B=bar | << C=baz ]");
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("if exists bar relabel bar BAR");

            RunTest(tregex, tsurgeon, "(A (B foo))", "(A (BAR foo))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[if exists bar relabel bar BAR] [if exists baz relabel baz BAZ]");
            RunTest(tregex, tsurgeon, "(A (B foo))", "(A (BAR foo))");
            RunTest(tregex, tsurgeon, "(A (C foo))", "(A (BAZ foo))");
            RunTest(tregex, tsurgeon, "(A (B foo) (C foo))", "(A (BAR foo) (BAZ foo))");
            string tree = new string("(ROOT (INTJ (CC But) (S (NP (DT the) (NNP RTC)) (ADVP (RB also)) (VP (VBZ requires) (`` ``) (S (FRAG (VBG working) ('' '') (NP (NP (NN capital)) (S (VP (TO to) (VP (VB maintain) (SBAR (S (NP (NP (DT the) (JJ bad) (NNS assets)) (PP (IN of) (NP (NP (NNS thrifts)) (SBAR (WHNP (WDT that)) (S (VP (VBP are) (VBN sold) (, ,) (PP (IN until) (NP (DT the) (NNS assets))))))))) (VP (MD can) (VP (VB be) (VP (VBN sold) (ADVP (RB separately))))))))))))))) (S (VP (. .)))))"
                                     );
            string expected = new string("(ROOT (INTJ (CC But) (S (NP (DT the) (NNP RTC)) (ADVP (RB also)) (VP (VBZ requires) (`` ``) (S (FRAG (VBG working) ('' '') (NP (NP (NN capital)) (S (VP (TO to) (VP (VB maintain) (SBAR (S (NP (NP (DT the) (JJ bad) (NNS assets)) (PP (IN of) (NP (NP (NNS thrifts)) (SBAR (WHNP (WDT that)) (S (VP (VBP are) (VBN sold) (, ,) (PP (IN until) (NP (DT the) (NNS assets))))))))) (VP (MD can) (VP (VB be) (VP (VBN sold) (ADVP (RB separately))))))))))))))) (. .)))"
                                         );

            tregex   = TregexPattern.Compile("__ !> __ <- (__=top <- (__ <<- (/[.]|PU/=punc < /[.!?。!?]/ ?> (__=single <: =punc))))");
            tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("[move punc >-1 top] [if exists single prune single]");
            RunTest(tregex, tsurgeon, tree, expected);
        }
        public virtual void TestAdjoin()
        {
            TsurgeonPattern tsurgeon = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ParseOperation("adjoin (FOO (BAR@)) foo");
            TregexPattern   tregex   = TregexPattern.Compile("B=foo");

            RunTest(tregex, tsurgeon, "(A (B 1 2))", "(A (FOO (BAR 1 2)))");
            RunTest(tregex, tsurgeon, "(A (C 1 2))", "(A (C 1 2))");
            RunTest(tregex, tsurgeon, "(A (B (B 1 2)))", "(A (FOO (BAR (FOO (BAR 1 2)))))");
            Tree          tree    = TreeFromString("(A (B 1 2))");
            TregexMatcher matcher = tregex.Matcher(tree);

            NUnit.Framework.Assert.IsTrue(matcher.Find());
            NUnit.Framework.Assert.AreEqual("(B 1 2)", matcher.GetNode("foo").ToString());
            Tree updated = tsurgeon.Matcher().Evaluate(tree, matcher);

            NUnit.Framework.Assert.AreEqual("(A (FOO (BAR 1 2)))", updated.ToString());
            // TODO: do we want the tsurgeon to implicitly update the matched node?
            // System.err.println(matcher.getNode("foo"));
            NUnit.Framework.Assert.IsFalse(matcher.Find());
        }
Beispiel #26
0
        /// <summary>Applies {#processPattern} to a collection of trees.</summary>
        /// <param name="matchPattern">
        /// A
        /// <see cref="Edu.Stanford.Nlp.Trees.Tregex.TregexPattern"/>
        /// to be matched against a
        /// <see cref="Edu.Stanford.Nlp.Trees.Tree"/>
        /// .
        /// </param>
        /// <param name="p">
        /// A
        /// <see cref="TsurgeonPattern"/>
        /// to apply.
        /// </param>
        /// <param name="inputTrees">The input trees to be processed</param>
        /// <returns>A List of the transformed trees</returns>
        public static IList <Tree> ProcessPatternOnTrees(TregexPattern matchPattern, TsurgeonPattern p, ICollection <Tree> inputTrees)
        {
            IList <Tree> result = inputTrees.Stream().Map(null).Collect(Collectors.ToList());

            return(result);
        }
 public AdjoinToFootNode(AuxiliaryTree t, TsurgeonPattern p)
     : base("adjoinF", t, p)
 {
 }
Beispiel #28
0
        // = false;
        // not an instantiable class
        /// <summary>Usage: java edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon [-s] -treeFile file-with-trees [-po matching-pattern operation] operation-file-1 operation-file-2 ...</summary>
        /// <remarks>
        /// Usage: java edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon [-s] -treeFile file-with-trees [-po matching-pattern operation] operation-file-1 operation-file-2 ... operation-file-n
        /// <h4>Arguments:</h4>
        /// Each argument should be the name of a transformation file that contains a list of pattern
        /// and transformation operation list pairs.  That is, it is a sequence of pairs of a
        /// <see cref="Edu.Stanford.Nlp.Trees.Tregex.TregexPattern"/>
        /// pattern on one or more lines, then a
        /// blank line (empty or whitespace), then a list of transformation operations one per line
        /// (as specified by <b>Legal operation syntax</b> below) to apply when the pattern is matched,
        /// and then another blank line (empty or whitespace).
        /// Note the need for blank lines: The code crashes if they are not present as separators
        /// (although the blank line at the end of the file can be omitted).
        /// The script file can include comment lines, either whole comment lines or
        /// trailing comments introduced by %, which extend to the end of line.  A needed percent
        /// mark can be escaped by a preceding backslash.
        /// <p>
        /// For example, if you want to excise an SBARQ node whenever it is the parent of an SQ node,
        /// and relabel the SQ node to S, your transformation file would look like this:
        /// <blockquote>
        /// <code>
        /// SBARQ=n1 &lt; SQ=n2<br />
        /// <br />
        /// excise n1 n1<br />
        /// relabel n2 S
        /// </code>
        /// </blockquote>
        /// <h4>Options:</h4>
        /// <ul>
        /// <li>
        /// <c>-treeFile &lt;filename&gt;</c>
        /// specify the name of the file that has the trees you want to transform.
        /// <li>
        /// <c>-po &lt;matchPattern&gt; &lt;operation&gt;</c>
        /// Apply a single operation to every tree using the specified match pattern and the specified operation.  Use this option
        /// when you want to quickly try the effect of one pattern/surgery combination, and are too lazy to write a transformation file.
        /// <li>
        /// <c>-s</c>
        /// Print each output tree on one line (default is pretty-printing).
        /// <li>
        /// <c>-m</c>
        /// For every tree that had a matching pattern, print "before" (prepended as "Operated on:") and "after" (prepended as "Result:").  Unoperated on trees just pass through the transducer as usual.
        /// <li>
        /// <c>-encoding X</c>
        /// Uses character set X for input and output of trees.
        /// <li>
        /// <c>-macros &lt;filename&gt;</c>
        /// A file of macros to use on the tregex pattern.  Macros should be one per line, with original and replacement separated by tabs.
        /// <li>
        /// <c>-hf &lt;headFinder-class-name&gt;</c>
        /// use the specified
        /// <see cref="Edu.Stanford.Nlp.Trees.IHeadFinder"/>
        /// class to determine headship relations.
        /// <li>
        /// <c>-hfArg &lt;string&gt;</c>
        /// pass a string argument in to the
        /// <see cref="Edu.Stanford.Nlp.Trees.IHeadFinder"/>
        /// class's constructor.
        /// <c>-hfArg</c>
        /// can be used multiple times to pass in multiple arguments.
        /// <li>
        /// <c>-trf &lt;TreeReaderFactory-class-name&gt;</c>
        /// use the specified
        /// <see cref="Edu.Stanford.Nlp.Trees.ITreeReaderFactory"/>
        /// class to read trees from files.
        /// </ul>
        /// <h4>Legal operation syntax:</h4>
        /// <ul>
        /// <li>
        /// <c>delete &lt;name&gt;</c>
        /// deletes the node and everything below it.
        /// <li>
        /// <c>prune &lt;name&gt;</c>
        /// Like delete, but if, after the pruning, the parent has no children anymore, the parent is pruned too.  Pruning continues to affect all ancestors until one is found with remaining children.  This may result in a null tree.
        /// <li>
        /// <c>excise &lt;name1&gt; &lt;name2&gt;</c>
        /// The name1 node should either dominate or be the same as the name2 node.  This excises out everything from
        /// name1 to name2.  All the children of name2 go into the parent of name1, where name1 was.
        /// <li>
        /// <c>relabel &lt;name&gt; &lt;new-label&gt;</c>
        /// Relabels the node to have the new label. <br />
        /// There are three possible forms: <br />
        /// <c>relabel nodeX VP</c>
        /// - for changing a node label to an
        /// alphanumeric string <br />
        /// <c>relabel nodeX /''/</c>
        /// - for relabeling a node to
        /// something that isn't a valid identifier without quoting <br />
        /// <c>relabel nodeX /^VB(.*)$/verb\\/$1/</c>
        /// - for regular
        /// expression based relabeling. In this case, all matches of the
        /// regular expression against the node label are replaced with the
        /// replacement String.  This has the semantics of Java/Perl's
        /// replaceAll: you may use capturing groups and put them in
        /// replacements with $n. For example, if the pattern is /foo/bar/
        /// and the node matched is "foo", the replaceAll semantics result in
        /// "barbar".  If the pattern is /^foo(.*)$/bar$1/ and node matched is
        /// "foofoo", relabel will result in "barfoo".  <br />
        /// When using the regex replacement method, you can also use the
        /// sequences ={node} and %{var} in the replacement string to use
        /// captured nodes or variable strings in the replacement string.
        /// For example, if the Tregex pattern was "duck=bar" and the relabel
        /// is /foo/={bar}/, "foofoo" will be replaced with "duckduck". <br />
        /// To concatenate two nodes named in the tregex pattern, for
        /// example, you can use the pattern /^.*$/={foo}={bar}/.  Note that
        /// the ^.*$ is necessary to make sure the regex pattern only matches
        /// and replaces once on the entire node name. <br />
        /// To get an "=" or a "%" in the replacement, using \ escaping.
        /// Also, as in the example you can escape a slash in the middle of
        /// the second and third forms with \\/ and \\\\. <br />
        /// <li>
        /// <c>insert &lt;name&gt; &lt;position&gt;</c>
        /// or
        /// <c>insert &lt;tree&gt; &lt;position&gt;</c>
        /// inserts the named node or tree into the position specified.
        /// <li>
        /// <c>move &lt;name&gt; &lt;position&gt;</c>
        /// moves the named node into the specified position.
        /// <p>Right now the  only ways to specify position are:
        /// <p>
        /// <c>$+ &lt;name&gt;</c>
        /// the left sister of the named node<br />
        /// <c>$- &lt;name&gt;</c>
        /// the right sister of the named node<br />
        /// <c>&gt;i &lt;name&gt;</c>
        /// the i_th daughter of the named node<br />
        /// <c>&gt;-i &lt;name&gt;</c>
        /// the i_th daughter, counting from the right, of the named node.
        /// <li>
        /// <c>replace &lt;name1&gt; &lt;name2&gt;</c>
        /// deletes name1 and inserts a copy of name2 in its place.
        /// <li>
        /// <c>replace &lt;name&gt; &lt;tree&gt; &lt;tree2&gt;...</c>
        /// deletes name and inserts the new tree(s) in its place.  If
        /// more than one replacement tree is given, each of the new
        /// subtrees will be added in order where the old tree was.
        /// Multiple subtrees at the root is an illegal operation and
        /// will throw an exception.
        /// <li>
        /// <c>createSubtree &lt;auxiliary-tree-or-label&gt; &lt;name1&gt; [&lt;name2&gt;]</c>
        /// Create a subtree out of all the nodes from
        /// <c>&lt;name1&gt;</c>
        /// through
        /// <c>&lt;name2&gt;</c>
        /// . The subtree is moved to the foot of the given
        /// auxiliary tree, and the tree is inserted where the nodes of
        /// the subtree used to reside. If a simple label is provided as
        /// the first argument, the subtree is given a single parent with
        /// a name corresponding to the label.  To limit the operation to
        /// just one node, elide
        /// <c>&lt;name2&gt;</c>
        /// .
        /// <li>
        /// <c>adjoin &lt;auxiliary_tree&gt; &lt;name&gt;</c>
        /// Adjoins the specified auxiliary tree into the named node.
        /// The daughters of the target node will become the daughters of the foot of the auxiliary tree.
        /// <li>
        /// <c>adjoinH &lt;auxiliary_tree&gt; &lt;name&gt;</c>
        /// Similar to adjoin, but preserves the target node
        /// and makes it the root of
        /// <c>&lt;tree&gt;</c>
        /// . (It is still accessible as
        /// <c>name</c>
        /// .  The root of the
        /// auxiliary tree is ignored.)
        /// <li>
        /// <c>adjoinF &lt;auxiliary_tree&gt; &lt;name&gt;</c>
        /// Similar to adjoin,
        /// but preserves the target node and makes it the foot of
        /// <c>&lt;tree&gt;</c>
        /// .
        /// (It is still accessible as
        /// <c>name</c>
        /// , and retains its status as parent of its children.
        /// The root of the auxiliary tree is ignored.)
        /// <li> <dt>
        /// <c>coindex &lt;name1&gt; &lt;name2&gt; ... &lt;nameM&gt;</c>
        /// Puts a (Penn Treebank style)
        /// coindexation suffix of the form "-N" on each of nodes name_1 through name_m.  The value of N will be
        /// automatically generated in reference to the existing coindexations in the tree, so that there is never
        /// an accidental clash of indices across things that are not meant to be coindexed.
        /// </ul>
        /// <p>
        /// In the context of
        /// <c>adjoin</c>
        /// ,
        /// <c>adjoinH</c>
        /// ,
        /// <c>adjoinF</c>
        /// , and
        /// <c>createSubtree</c>
        /// , an auxiliary
        /// tree is a tree in Penn Treebank format with
        /// <c>@</c>
        /// on
        /// exactly one of the leaves denoting the foot of the tree.
        /// The operations which use the foot use the labeled node.
        /// For example:
        /// </p>
        /// <blockquote>
        /// Tsurgeon:
        /// <c>adjoin (FOO (BAR@)) foo</c>
        /// <br />
        /// Tregex:
        /// <c>B=foo</c>
        /// <br />
        /// Input:
        /// <c>(A (B 1 2))</c>
        /// Output:
        /// <c>(A (FOO (BAR 1 2)))</c>
        /// </blockquote>
        /// <p>
        /// Tsurgeon applies the same operation to the same tree for as long
        /// as the given tregex operation matches.  This means that infinite
        /// loops are very easy to cause.  One common situation where this comes up
        /// is with an insert operation will repeats infinitely many times
        /// unless you add an expression to the tregex that matches against
        /// the inserted pattern.  For example, this pattern will infinite loop:
        /// </p>
        /// <blockquote>
        /// <code>
        /// TregexPattern tregex = TregexPattern.compile("S=node &lt;&lt; NP"); <br />
        /// TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("insert (NP foo) &gt;-1 node");
        /// </code>
        /// </blockquote>
        /// <p>
        /// This pattern, though, will terminate:
        /// </p>
        /// <blockquote>
        /// <code>
        /// TregexPattern tregex = TregexPattern.compile("S=node &lt;&lt; NP !&lt;&lt; foo"); <br />
        /// TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("insert (NP foo) &gt;-1 node");
        /// </code>
        /// </blockquote>
        /// <p>
        /// Tsurgeon has (very) limited support for conditional statements.
        /// If a pattern is prefaced with
        /// <c>if exists &lt;name&gt;</c>
        /// ,
        /// the rest of the pattern will only execute if
        /// the named node was found in the corresponding TregexMatcher.
        /// </p>
        /// </remarks>
        /// <param name="args">
        /// a list of names of files each of which contains a single tregex matching pattern plus a list, one per line,
        /// of transformation operations to apply to the matched pattern.
        /// </param>
        /// <exception cref="System.Exception">If an I/O or pattern syntax error</exception>
        public static void Main(string[] args)
        {
            string headFinderClassName = null;
            string headFinderOption    = "-hf";

            string[] headFinderArgs      = null;
            string   headFinderArgOption = "-hfArg";
            string   encoding            = "UTF-8";
            string   encodingOption      = "-encoding";

            if (args.Length == 0)
            {
                log.Info("Usage: java edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon [-s] -treeFile <file-with-trees> [-po <matching-pattern> <operation>] <operation-file-1> <operation-file-2> ... <operation-file-n>");
                System.Environment.Exit(0);
            }
            string treePrintFormats;
            string singleLineOption = "-s";
            string verboseOption    = "-v";
            string matchedOption    = "-m";
            // if set, then print original form of trees that are matched & thus operated on
            string patternOperationOption = "-po";
            string treeFileOption         = "-treeFile";
            string trfOption     = "-trf";
            string macroOption   = "-macros";
            string macroFilename = string.Empty;
            IDictionary <string, int> flagMap = Generics.NewHashMap();

            flagMap[patternOperationOption] = 2;
            flagMap[treeFileOption]         = 1;
            flagMap[trfOption]        = 1;
            flagMap[singleLineOption] = 0;
            flagMap[encodingOption]   = 1;
            flagMap[headFinderOption] = 1;
            flagMap[macroOption]      = 1;
            IDictionary <string, string[]> argsMap = StringUtils.ArgsToMap(args, flagMap);

            args = argsMap[null];
            if (argsMap.Contains(headFinderOption))
            {
                headFinderClassName = argsMap[headFinderOption][0];
            }
            if (argsMap.Contains(headFinderArgOption))
            {
                headFinderArgs = argsMap[headFinderArgOption];
            }
            if (argsMap.Contains(verboseOption))
            {
                verbose = true;
            }
            if (argsMap.Contains(singleLineOption))
            {
                treePrintFormats = "oneline,";
            }
            else
            {
                treePrintFormats = "penn,";
            }
            if (argsMap.Contains(encodingOption))
            {
                encoding = argsMap[encodingOption][0];
            }
            if (argsMap.Contains(macroOption))
            {
                macroFilename = argsMap[macroOption][0];
            }
            TreePrint          tp    = new TreePrint(treePrintFormats, new PennTreebankLanguagePack());
            PrintWriter        pwOut = new PrintWriter(new OutputStreamWriter(System.Console.Out, encoding), true);
            ITreeReaderFactory trf;

            if (argsMap.Contains(trfOption))
            {
                string trfClass = argsMap[trfOption][0];
                trf = ReflectionLoading.LoadByReflection(trfClass);
            }
            else
            {
                trf = new TregexPattern.TRegexTreeReaderFactory();
            }
            Treebank trees = new DiskTreebank(trf, encoding);

            if (argsMap.Contains(treeFileOption))
            {
                trees.LoadPath(argsMap[treeFileOption][0]);
            }
            if (trees.IsEmpty())
            {
                log.Info("Warning: No trees specified to operate on.  Use -treeFile path option.");
            }
            TregexPatternCompiler compiler;

            if (headFinderClassName == null)
            {
                compiler = new TregexPatternCompiler();
            }
            else
            {
                IHeadFinder hf;
                if (headFinderArgs == null)
                {
                    hf = ReflectionLoading.LoadByReflection(headFinderClassName);
                }
                else
                {
                    hf = ReflectionLoading.LoadByReflection(headFinderClassName, (object[])headFinderArgs);
                }
                compiler = new TregexPatternCompiler(hf);
            }
            Macros.AddAllMacros(compiler, macroFilename, encoding);
            IList <Pair <TregexPattern, TsurgeonPattern> > ops = new List <Pair <TregexPattern, TsurgeonPattern> >();

            if (argsMap.Contains(patternOperationOption))
            {
                TregexPattern   matchPattern = compiler.Compile(argsMap[patternOperationOption][0]);
                TsurgeonPattern p            = ParseOperation(argsMap[patternOperationOption][1]);
                ops.Add(new Pair <TregexPattern, TsurgeonPattern>(matchPattern, p));
            }
            else
            {
                foreach (string arg in args)
                {
                    IList <Pair <TregexPattern, TsurgeonPattern> > pairs = GetOperationsFromFile(arg, encoding, compiler);
                    foreach (Pair <TregexPattern, TsurgeonPattern> pair in pairs)
                    {
                        if (verbose)
                        {
                            log.Info(pair.Second());
                        }
                        ops.Add(pair);
                    }
                }
            }
            foreach (Tree t in trees)
            {
                Tree original = t.DeepCopy();
                Tree result   = ProcessPatternsOnTree(ops, t);
                if (argsMap.Contains(matchedOption) && matchedOnTree)
                {
                    pwOut.Println("Operated on: ");
                    DisplayTree(original, tp, pwOut);
                    pwOut.Println("Result: ");
                }
                DisplayTree(result, tp, pwOut);
            }
        }
 public AdjoinToHeadNode(AuxiliaryTree t, TsurgeonPattern p)
     : base("adjoinH", t, p)
 {
 }
 public static void OutputResults(TregexPattern tregex, TsurgeonPattern tsurgeon, string input, string expected)
 {
     OutputResults(tregex, tsurgeon, input);
 }