/// <summary>
 /// Gets a string representation of the charset entry
 /// </summary>
 /// <returns>The string representation of this charset entry</returns>
 public override string ToString()
 {
     if (1 == Last - First)
     {
         return(string.Concat(RegexExpression.EscapeRangeChar(First), RegexExpression.EscapeRangeChar(Last)));
     }
     if (2 == Last - First)
     {
         return(string.Concat(RegexExpression.EscapeRangeChar(First), RegexExpression.EscapeRangeChar((char)(First + 1)), RegexExpression.EscapeRangeChar(Last)));
     }
     return(string.Concat(RegexExpression.EscapeRangeChar(First), "-", RegexExpression.EscapeRangeChar(Last)));
 }
Ejemplo n.º 2
0
 /// <summary>
 /// Creates a new expression with the specified left and right hand sides
 /// </summary>
 /// <param name="left">The left expression</param>
 /// <param name="right">The right expressions</param>
 public RegexConcatExpression(RegexExpression left, params RegexExpression[] right)
 {
     Left = left;
     for (int i = 0; i < right.Length; i++)
     {
         var r = right[i];
         if (null == Right)
         {
             Right = r;
         }
         if (i != right.Length - 1)
         {
             var c = new RegexConcatExpression();
             c.Left  = Left;
             c.Right = Right;
             Right   = null;
             Left    = c;
         }
     }
 }
Ejemplo n.º 3
0
 /// <summary>
 /// Gets a string representation of the charset entry
 /// </summary>
 /// <returns>The string representation of this charset entry</returns>
 public override string ToString()
 {
     return(string.Concat(RegexExpression.EscapeRangeChar(First), "-", RegexExpression.EscapeRangeChar(Last)));
 }
Ejemplo n.º 4
0
 /// <summary>
 /// Gets a string representation of the charset entry
 /// </summary>
 /// <returns>The string representation of this charset entry</returns>
 public override string ToString()
 {
     return(RegexExpression.EscapeRangeChar(Value));
 }
Ejemplo n.º 5
0
 /// <summary>
 /// Creates a repeat expression with the specifed target expression, and minimum and maximum occurances
 /// </summary>
 /// <param name="expression">The target expression</param>
 /// <param name="minOccurs">The minimum number of times the target expression can occur or -1</param>
 /// <param name="maxOccurs">The maximum number of times the target expression can occur or -1</param>
 public RegexRepeatExpression(RegexExpression expression, int minOccurs = -1, int maxOccurs = -1)
 {
     Expression = expression;
     MinOccurs  = minOccurs;
     MaxOccurs  = maxOccurs;
 }
 /// <summary>
 /// Creates an optional expression using the specified target expression
 /// </summary>
 /// <param name="expression">The target expression to make optional</param>
 public RegexOptionalExpression(RegexExpression expression)
 {
     Expression = expression;
 }
Ejemplo n.º 7
0
        static RegexExpression _ParseModifier(RegexExpression expr, ParseContext pc)
        {
            var line     = pc.Line;
            var column   = pc.Column;
            var position = pc.Position;

            switch (pc.Current)
            {
            case '*':
                expr = new RegexRepeatExpression(expr);
                expr.SetLocation(line, column, position);
                pc.Advance();
                break;

            case '+':
                expr = new RegexRepeatExpression(expr, 1);
                expr.SetLocation(line, column, position);
                pc.Advance();
                break;

            case '?':
                expr = new RegexOptionalExpression(expr);
                expr.SetLocation(line, column, position);
                pc.Advance();
                break;

            case '{':
                pc.Advance();
                pc.TrySkipWhiteSpace();
                pc.Expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ',', '}');
                var min = -1;
                var max = -1;
                if (',' != pc.Current && '}' != pc.Current)
                {
                    var l = pc.CaptureBuffer.Length;
                    pc.TryReadDigits();
                    min = int.Parse(pc.GetCapture(l));
                    pc.TrySkipWhiteSpace();
                }
                if (',' == pc.Current)
                {
                    pc.Advance();
                    pc.TrySkipWhiteSpace();
                    pc.Expecting('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '}');
                    if ('}' != pc.Current)
                    {
                        var l = pc.CaptureBuffer.Length;
                        pc.TryReadDigits();
                        max = int.Parse(pc.GetCapture(l));
                        pc.TrySkipWhiteSpace();
                    }
                }
                else
                {
                    max = min;
                }
                pc.Expecting('}');
                pc.Advance();
                expr = new RegexRepeatExpression(expr, min, max);
                expr.SetLocation(line, column, position);
                break;
            }
            return(expr);
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Parses a regular expression from the specified <see cref="ParseContext"/>
        /// </summary>
        /// <param name="pc">The parse context to use</param>
        /// <returns>A new abstract syntax tree representing the expression</returns>
        public static RegexExpression Parse(ParseContext pc)
        {
            RegexExpression result = null, next = null;
            int             ich;

            pc.EnsureStarted();
            var line = pc.Line;
            var column   = pc.Column;
            var position = pc.Position;

            while (true)
            {
                switch (pc.Current)
                {
                case -1:
                    return(result);

                case '.':
                    var nset = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetRangeEntry(char.MinValue, char.MaxValue) }, false);
                    nset.SetLocation(line, column, position);
                    if (null == result)
                    {
                        result = nset;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, nset);
                        result.SetLocation(line, column, position);
                    }
                    pc.Advance();
                    result   = _ParseModifier(result, pc);
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case '\\':

                    pc.Advance();
                    pc.Expecting();
                    switch (pc.Current)
                    {
                    case 'd':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("digit") });
                        pc.Advance();
                        break;

                    case 'D':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("digit") }, true);
                        pc.Advance();
                        break;

                    case 'h':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("blank") });
                        pc.Advance();
                        break;

                    case 'l':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("lower") });
                        pc.Advance();
                        break;

                    case 's':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("space") });
                        pc.Advance();
                        break;

                    case 'S':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("space") }, true);
                        pc.Advance();
                        break;

                    case 'u':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("upper") });
                        pc.Advance();
                        break;

                    case 'w':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("word") });
                        pc.Advance();
                        break;

                    case 'W':
                        next = new RegexCharsetExpression(new RegexCharsetEntry[] { new RegexCharsetClassEntry("word") }, true);
                        pc.Advance();
                        break;

                    default:
                        if (-1 != (ich = _ParseEscapePart(pc)))
                        {
                            next = new RegexLiteralExpression((char)ich);
                        }
                        else
                        {
                            pc.Expecting();                                          // throw an error
                            return(null);                                            // doesn't execute
                        }
                        break;
                    }
                    next.SetLocation(line, column, position);
                    next = _ParseModifier(next, pc);
                    if (null != result)
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    else
                    {
                        result = next;
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case ')':
                    return(result);

                case '(':
                    pc.Advance();
                    pc.Expecting();
                    next = Parse(pc);
                    pc.Expecting(')');
                    pc.Advance();
                    next = _ParseModifier(next, pc);
                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case '|':
                    if (-1 != pc.Advance())
                    {
                        next   = Parse(pc);
                        result = new RegexOrExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    else
                    {
                        result = new RegexOrExpression(result, null);
                        result.SetLocation(line, column, position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                case '[':
                    pc.ClearCapture();
                    pc.Advance();
                    pc.Expecting();
                    bool not = false;


                    if ('^' == pc.Current)
                    {
                        not = true;
                        pc.Advance();
                        pc.Expecting();
                    }
                    var ranges = _ParseRanges(pc);
                    if (ranges.Count == 0)
                    {
                        System.Diagnostics.Debugger.Break();
                    }
                    pc.Expecting(']');
                    pc.Advance();
                    next = new RegexCharsetExpression(ranges, not);
                    next.SetLocation(line, column, position);
                    next = _ParseModifier(next, pc);

                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(pc.Line, pc.Column, pc.Position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;

                default:
                    ich  = pc.Current;
                    next = new RegexLiteralExpression((char)ich);
                    next.SetLocation(line, column, position);
                    pc.Advance();
                    next = _ParseModifier(next, pc);
                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = new RegexConcatExpression(result, next);
                        result.SetLocation(line, column, position);
                    }
                    line     = pc.Line;
                    column   = pc.Column;
                    position = pc.Position;
                    break;
                }
            }
        }
Ejemplo n.º 9
0
        static RegexExpression _FromFA <TAccept>(CharFA <TAccept> fa, HashSet <CharFA <TAccept> > visited)
        {
            if (!visited.Add(fa))
            {
                return(null);
            }
            var             trgs        = fa.FillInputTransitionRangesGroupedByState();
            bool            isAccepting = fa.IsAccepting;
            RegexExpression expr        = null;

            foreach (var trg in trgs)
            {
                if (1 == trg.Value.Count && 1 == trg.Value[0].Length)
                {
                    RegexExpression le   = new RegexLiteralExpression(trg.Value[0][0]);
                    var             next = _FromFA(trg.Key, visited);
                    if (null != next)
                    {
                        le = new RegexConcatExpression(le, next);
                    }
                    if (null == expr)
                    {
                        expr = le;
                    }
                    else
                    {
                        expr = new RegexOrExpression(expr, le);
                    }
                }
                else
                {
                    var csel = new List <RegexCharsetEntry>();
                    foreach (var rng in trg.Value)
                    {
                        if (rng.First == rng.Last)
                        {
                            csel.Add(new RegexCharsetCharEntry(rng.First));
                        }
                        else
                        {
                            csel.Add(new RegexCharsetRangeEntry(rng.First, rng.Last));
                        }
                    }
                    RegexExpression cse  = new RegexCharsetExpression(csel);
                    var             next = _FromFA(trg.Key, visited);
                    if (null != next)
                    {
                        cse = new RegexConcatExpression(cse, next);
                    }
                    if (null == expr)
                    {
                        expr = cse;
                    }
                    else
                    {
                        expr = new RegexOrExpression(expr, cse);
                    }
                }
            }
            var isLoop = false;

            foreach (var val in fa.Descendants)
            {
                if (val == fa)
                {
                    isLoop = true;
                    break;
                }
            }

            if (isAccepting && !fa.IsFinal && !isLoop)
            {
                expr = new RegexOptionalExpression(expr);
            }

            return(expr);
        }