Beispiel #1
0
        public void InstructionWithOneStackArgumentShouldResultInAssignmentAndExpressionStatementWithArgument()
        {
            var cfg = ConstructAst(new[]
            {
                DummyInstruction.Push(0, 1),
                DummyInstruction.Pop(1, 1),
                DummyInstruction.Ret(2)
            });

            var variableCapture = new CaptureGroup("variable");

            var pattern = new SequencePattern <Statement <DummyInstruction> >(
                // stack_slot = push 1()
                StatementPattern
                .Assignment <DummyInstruction>()
                .WithExpression(ExpressionPattern.Instruction(new DummyInstructionPattern(DummyOpCode.Push)))
                .CaptureVariables(variableCapture),

                // pop(stack_slot)
                StatementPattern.Expression(ExpressionPattern
                                            .Instruction(new DummyInstructionPattern(DummyOpCode.Pop))
                                            .WithArguments(ExpressionPattern
                                                           .Variable <DummyInstruction>()
                                                           .CaptureVariable(variableCapture))),

                // ret()
                StatementPattern.Instruction(new DummyInstructionPattern(DummyOpCode.Ret))
                );

            var result = pattern.Match(cfg.Nodes[0].Contents.Instructions);

            Assert.True(result.IsSuccess);
            Assert.Single(result.Captures[variableCapture].Distinct());
        }
 public BasicCoreMapSequenceMatcher(SequencePattern <ICoreMap> pattern, ICoreMap annotation)
     : base(pattern, annotation.Get(typeof(CoreAnnotations.TokensAnnotation)))
 {
     // this.nodesToStringConverter = COREMAP_LIST_TO_STRING_CONVERTER;
     this.annotation             = annotation;
     this.nodesToStringConverter = CoremapListToStringConverter;
 }
Beispiel #3
0
        public void EmptySequenceShouldOnlyMatchOnEmptyCollections()
        {
            var pattern = new SequencePattern <int>();

            Assert.True(pattern.Matches(ArraySegment <int> .Empty));
            Assert.False(pattern.Matches(new[] { 1, 2, 3 }));
        }
Beispiel #4
0
        public void SingleElementShouldMatchOnSameSingleElementList()
        {
            var pattern = new SequencePattern <int>(Pattern.Literal(1));

            Assert.True(pattern.Matches(new[] { 1 }));
            Assert.False(pattern.Matches(new[] { 2 }));
            Assert.False(pattern.Matches(ArraySegment <int> .Empty));
            Assert.False(pattern.Matches(new[] { 1, 2 }));
        }
Beispiel #5
0
        public virtual NodePattern GetNodePattern(string name)
        {
            object obj = variables[name];

            if (obj != null)
            {
                if (obj is SequencePattern)
                {
                    SequencePattern seqPattern = (SequencePattern)obj;
                    if (seqPattern.GetPatternExpr() is SequencePattern.NodePatternExpr)
                    {
                        return(((SequencePattern.NodePatternExpr)seqPattern.GetPatternExpr()).nodePattern);
                    }
                    else
                    {
                        throw new Exception("Invalid node pattern class: " + seqPattern.GetPatternExpr().GetType() + " for variable " + name);
                    }
                }
                else
                {
                    if (obj is SequencePattern.NodePatternExpr)
                    {
                        SequencePattern.NodePatternExpr pe = (SequencePattern.NodePatternExpr)obj;
                        return(pe.nodePattern);
                    }
                    else
                    {
                        if (obj is NodePattern)
                        {
                            return((NodePattern)obj);
                        }
                        else
                        {
                            if (obj is string)
                            {
                                try
                                {
                                    SequencePattern.NodePatternExpr pe = (SequencePattern.NodePatternExpr)parser.ParseNode(this, (string)obj);
                                    return(pe.nodePattern);
                                }
                                catch (Exception pex)
                                {
                                    throw new Exception("Error parsing " + obj + " to node pattern", pex);
                                }
                            }
                            else
                            {
                                throw new Exception("Invalid node pattern variable class: " + obj.GetType() + " for variable " + name);
                            }
                        }
                    }
                }
            }
            return(null);
        }
Beispiel #6
0
 public TokenSequenceMatcher(SequencePattern <ICoreMap> pattern, IList <ICoreMap> tokens)
     : base(pattern, tokens)
 {
     /* protected static Function<List<? extends CoreLabel>, String> CORELABEL_LIST_TO_STRING_CONVERTER =
      * new Function<List<? extends CoreLabel>, String>() {
      * public String apply(List<? extends CoreLabel> in) {
      * return (in != null)? ChunkAnnotationUtils.getTokenText(in, CoreAnnotations.TextAnnotation.class): null;
      * }
      * };     */
     //   this.nodesToStringConverter = CORELABEL_LIST_TO_STRING_CONVERTER;
     this.nodesToStringConverter = CoremapListToStringConverter;
 }
Beispiel #7
0
        public void PushingTwoValuesOnStackWithDifferentConsumers()
        {
            var cfg = ConstructAst(new[]
            {
                DummyInstruction.Push(0, 2),
                DummyInstruction.Pop(1, 1),
                DummyInstruction.Pop(2, 1),
                DummyInstruction.Ret(3)
            });

            var variableCapture   = new CaptureGroup("variable");
            var argumentsCapture1 = new CaptureGroup("argument1");
            var argumentsCapture2 = new CaptureGroup("argument2");

            var pattern = new SequencePattern <Statement <DummyInstruction> >(
                // stack_slot_1, stack_slot_2 = push 2()
                StatementPattern
                .Assignment <DummyInstruction>()
                .WithVariables(2)
                .CaptureVariables(variableCapture),

                // pop(?)
                StatementPattern.Expression(ExpressionPattern
                                            .Instruction <DummyInstruction>()
                                            .WithArguments(1)
                                            .CaptureArguments(argumentsCapture1)),

                // pop(?)
                StatementPattern.Expression(ExpressionPattern
                                            .Instruction <DummyInstruction>()
                                            .WithArguments(1)
                                            .CaptureArguments(argumentsCapture2)),

                // ret()
                StatementPattern.Instruction(new DummyInstructionPattern(DummyOpCode.Ret))
                );

            var result = pattern.Match(cfg.Nodes[0].Contents.Instructions);

            Assert.True(result.IsSuccess);

            var variables = result.Captures[variableCapture]
                            .Cast <IVariable>()
                            .ToArray();

            var argument1 = (VariableExpression <DummyInstruction>)result.Captures[argumentsCapture1][0];
            var argument2 = (VariableExpression <DummyInstruction>)result.Captures[argumentsCapture2][0];

            // Note: we expect the first pop statement to use the second variable that was pushed by the push instruction.
            Assert.Equal(variables[1], argument1.Variable);
            Assert.Equal(variables[0], argument2.Variable);
        }
 /// <summary>Configure all parameters for converting a list of tokens into sentences.</summary>
 /// <remarks>
 /// Configure all parameters for converting a list of tokens into sentences.
 /// The whole enchilada.
 /// </remarks>
 /// <param name="boundaryTokenRegex">
 /// Tokens that match this regex will end a
 /// sentence, but are retained at the end of
 /// the sentence. Substantive value must be supplied.
 /// </param>
 /// <param name="boundaryFollowersRegex">
 /// This is a Set of String that are matched with
 /// .equals() which are allowed to be tacked onto
 /// the end of a sentence after a sentence boundary
 /// token, for example ")". Substantive value must be supplied.
 /// </param>
 /// <param name="boundariesToDiscard">
 /// This is normally used for newline tokens if
 /// they are included in the tokenization. They
 /// may end the sentence (depending on the setting
 /// of newlineIsSentenceBreak), but at any rate
 /// are deleted from sentences in the output.
 /// Substantive value must be supplied.
 /// </param>
 /// <param name="xmlBreakElementsToDiscard">
 /// These are elements like "p" or "sent",
 /// which will be wrapped into regex for
 /// approximate XML matching. They will be
 /// deleted in the output, and will always
 /// trigger a sentence boundary.
 /// May be null; means discard none.
 /// </param>
 /// <param name="regionElementRegex">
 /// XML element name regex to delimit regions processed.
 /// Tokens outside one of these elements are discarded.
 /// May be null; means to not filter by regions
 /// </param>
 /// <param name="newlineIsSentenceBreak">How to treat newlines. Must have substantive value.</param>
 /// <param name="sentenceBoundaryMultiTokenPattern">
 /// A TokensRegex multi-token pattern for finding boundaries.
 /// May be null; means that there are no such patterns.
 /// </param>
 /// <param name="tokenRegexesToDiscard">
 /// Regex for tokens to discard.
 /// May be null; means that no tokens are discarded in this way.
 /// </param>
 /// <param name="isOneSentence">
 /// Whether to treat whole of input as one sentence regardless.
 /// Must have substantive value. Overrides anything else.
 /// </param>
 /// <param name="allowEmptySentences">
 /// Whether to allow empty sentences to be output
 /// Must have substantive value. Often suppressed, but don't want that in things like
 /// strict one-sentence-per-line mode.
 /// </param>
 public WordToSentenceProcessor(string boundaryTokenRegex, string boundaryFollowersRegex, ICollection <string> boundariesToDiscard, ICollection <string> xmlBreakElementsToDiscard, string regionElementRegex, WordToSentenceProcessor.NewlineIsSentenceBreak
                                newlineIsSentenceBreak, SequencePattern <In> sentenceBoundaryMultiTokenPattern, ICollection <string> tokenRegexesToDiscard, bool isOneSentence, bool allowEmptySentences)
 {
     /* ---------- Constructors --------- */
     sentenceBoundaryTokenPattern     = Pattern.Compile(boundaryTokenRegex);
     sentenceBoundaryFollowersPattern = Pattern.Compile(boundaryFollowersRegex);
     sentenceBoundaryToDiscard        = Java.Util.Collections.UnmodifiableSet(boundariesToDiscard);
     if (xmlBreakElementsToDiscard == null || xmlBreakElementsToDiscard.IsEmpty())
     {
         this.xmlBreakElementsToDiscard = null;
     }
     else
     {
         this.xmlBreakElementsToDiscard = new List <Pattern>(xmlBreakElementsToDiscard.Count);
         foreach (string s in xmlBreakElementsToDiscard)
         {
             string regex = "<\\s*(?:/\\s*)?(?:" + s + ")(?:\\s+[^>]+?|\\s*(?:/\\s*)?)>";
             // log.info("Regex is |" + regex + "|");
             // todo: Historically case insensitive, but maybe better and more proper to make case sensitive?
             this.xmlBreakElementsToDiscard.Add(Pattern.Compile(regex, Pattern.CaseInsensitive | Pattern.UnicodeCase));
         }
     }
     if (regionElementRegex != null)
     {
         sentenceRegionBeginPattern = Pattern.Compile("<\\s*(?:" + regionElementRegex + ")(?:\\s+[^>]+?)?>");
         sentenceRegionEndPattern   = Pattern.Compile("<\\s*/\\s*(?:" + regionElementRegex + ")\\s*>");
     }
     else
     {
         sentenceRegionBeginPattern = null;
         sentenceRegionEndPattern   = null;
     }
     this.newlineIsSentenceBreak            = newlineIsSentenceBreak;
     this.sentenceBoundaryMultiTokenPattern = sentenceBoundaryMultiTokenPattern;
     if (tokenRegexesToDiscard != null)
     {
         this.tokenPatternsToDiscard = new List <Pattern>(tokenRegexesToDiscard.Count);
         foreach (string s in tokenRegexesToDiscard)
         {
             this.tokenPatternsToDiscard.Add(Pattern.Compile(s));
         }
     }
     else
     {
         this.tokenPatternsToDiscard = null;
     }
     this.isOneSentence       = isOneSentence;
     this.allowEmptySentences = allowEmptySentences;
 }
Beispiel #9
0
        public virtual SequencePattern.PatternExpr GetSequencePatternExpr(string name, bool copy)
        {
            object obj = variables[name];

            if (obj != null)
            {
                if (obj is SequencePattern)
                {
                    SequencePattern seqPattern = (SequencePattern)obj;
                    return(seqPattern.GetPatternExpr());
                }
                else
                {
                    if (obj is SequencePattern.PatternExpr)
                    {
                        SequencePattern.PatternExpr pe = (SequencePattern.PatternExpr)obj;
                        return((copy) ? pe.Copy() : pe);
                    }
                    else
                    {
                        if (obj is NodePattern)
                        {
                            return(new SequencePattern.NodePatternExpr((NodePattern)obj));
                        }
                        else
                        {
                            if (obj is string)
                            {
                                try
                                {
                                    return(parser.ParseSequence(this, (string)obj));
                                }
                                catch (Exception pex)
                                {
                                    throw new Exception("Error parsing " + obj + " to sequence pattern", pex);
                                }
                            }
                            else
                            {
                                throw new Exception("Invalid sequence pattern variable class: " + obj.GetType());
                            }
                        }
                    }
                }
            }
            return(null);
        }
Beispiel #10
0
        public void PushingTwoValuesOnStackShouldResultInTwoVariablesAssigned()
        {
            var cfg = ConstructAst(new[]
            {
                DummyInstruction.Push(0, 2),
                DummyInstruction.Pop(1, 2),
                DummyInstruction.Ret(2)
            });

            var variableCapture  = new CaptureGroup("variable");
            var argumentsCapture = new CaptureGroup("argument");

            var pattern = new SequencePattern <Statement <DummyInstruction> >(
                // stack_slot_1, stack_slot_2 = push 2()
                StatementPattern
                .Assignment <DummyInstruction>()
                .WithVariables(2)
                .CaptureVariables(variableCapture),

                // pop(?, ?)
                StatementPattern.Expression(ExpressionPattern
                                            .Instruction <DummyInstruction>()
                                            .WithArguments(2)
                                            .CaptureArguments(argumentsCapture)),

                // ret()
                StatementPattern.Instruction(new DummyInstructionPattern(DummyOpCode.Ret))
                );

            var result = pattern.Match(cfg.Nodes[0].Contents.Instructions);

            Assert.True(result.IsSuccess);

            var variables = result.Captures[variableCapture]
                            .Cast <IVariable>()
                            .ToArray();

            var arguments = result.Captures[argumentsCapture]
                            .Cast <VariableExpression <DummyInstruction> >()
                            .Select(e => e.Variable)
                            .ToArray();

            Assert.Equal(variables, arguments);
        }
Beispiel #11
0
 public virtual void Bind(string name, SequencePattern pattern)
 {
     Bind(name, pattern.GetPatternExpr());
 }
 public SequenceRegexPattern(SequencePattern <T> pattern)
 {
     this.pattern = pattern;
 }
 /// <summary>
 /// Flexibly set the set of acceptable sentence boundary tokens, but with
 /// a default set of allowed boundary following tokens.
 /// </summary>
 /// <remarks>
 /// Flexibly set the set of acceptable sentence boundary tokens, but with
 /// a default set of allowed boundary following tokens. Also can set sentence boundary
 /// to discard tokens and xmlBreakElementsToDiscard and set the treatment of newlines
 /// (boundaryToDiscard) as sentence ends.
 /// This one is convenient in allowing any of the first 3 arguments to be null,
 /// and then the usual defaults are substituted for it.
 /// The allowed set of boundary followers is the regex: "[\\p{Pe}\\p{Pf}'\"]|''|-R[CRS]B-".
 /// The default set of discarded separator tokens includes the
 /// newline tokens used by WhitespaceLexer and PTBLexer.
 /// </remarks>
 /// <param name="boundaryTokenRegex">The regex of boundary tokens. If null, use default.</param>
 /// <param name="boundaryFollowersRegex">
 /// The regex of boundary following tokens. If null, use default.
 /// These are tokens which should normally be added on to the current sentence
 /// even after something normally sentence ending has been seen. For example,
 /// typically a close parenthesis or close quotes goes with the current sentence,
 /// even after a period or question mark have been seen.
 /// </param>
 /// <param name="boundaryToDiscard">
 /// The set of regex for sentence boundary tokens that should be discarded.
 /// If null, use default.
 /// </param>
 /// <param name="xmlBreakElementsToDiscard">
 /// xml element names like "p", which will be recognized,
 /// treated as sentence ends, and discarded.
 /// If null, use none.
 /// </param>
 /// <param name="newlineIsSentenceBreak">Strategy for counting line ends (boundaryToDiscard) as sentence ends.</param>
 public WordToSentenceProcessor(string boundaryTokenRegex, string boundaryFollowersRegex, ICollection <string> boundaryToDiscard, ICollection <string> xmlBreakElementsToDiscard, WordToSentenceProcessor.NewlineIsSentenceBreak newlineIsSentenceBreak
                                , SequencePattern <In> sentenceBoundaryMultiTokenPattern, ICollection <string> tokenRegexesToDiscard)
     : this(boundaryTokenRegex == null ? DefaultBoundaryRegex : boundaryTokenRegex, boundaryFollowersRegex == null ? DefaultBoundaryFollowersRegex : boundaryFollowersRegex, boundaryToDiscard == null || boundaryToDiscard.IsEmpty() ? DefaultSentenceBoundariesToDiscard
                          : boundaryToDiscard, xmlBreakElementsToDiscard == null ? Java.Util.Collections.EmptySet() : xmlBreakElementsToDiscard, null, newlineIsSentenceBreak, sentenceBoundaryMultiTokenPattern, tokenRegexesToDiscard, false, false)
 {
 }
Beispiel #14
0
 public StartMatchAction(SequencePattern <T> pattern)
 {
     this.pattern = pattern;
 }
 public CoreMapSequenceMatcher(SequencePattern <T> pattern, IList <T> tokens)
     : base(pattern, tokens)
 {
 }