Пример #1
0
        private CharClass canonicalized(NotCharClass e)
        {
            /*
             * Here we convert something like [^a-zA-Z] into a vector of CharPartRange objects
             * we start with the 'any char' range, from 0 to 0xffff, and we subtract each CharClassPart in turn
             */

            // todo:
            // Since the code tries to simplify things by converting
            // all CharClassPart objects into ranges, we will treat 'Any'
            // as a range from 0 to 0xffff (the maximum value for a char, i.e 16-bit Unicode)
            // this probably wreaks havoc with internationalization, but we're using chars
            // and ignoring anything more than 16bit for now anyway. If we later use a more advanced library
            // we need to deal with 'Any' and 'Not' for CharClasses in a more general
            // (and encoding independent) way.

            // Note that the code below assumes the 'excluded' vector is sorted
            // which is provided by the called other canonicalized(..) function
            List <CharPartRange> excluded = canonicalized(e.Parts);

            List <CharClassPart> result = new List <CharClassPart>();
            int start = 0, end = 0xffff;

            for (int i = 0; i < excluded.Count; ++i)
            {
                CharPartRange r = excluded[i];
                // It is important for a, b to be SIGNED ints, so that From.unicode()-1 can be negative
                // and To.unicode()+1 doesn't wrap around
                int a = start, b = r.From - 1;
                if (valid(a, b))
                {
                    result.Add(new CharPartRange((char)a, (char)b));
                }
                start = r.To + 1;
            }

            int aa = start, bb = end;

            if (valid(aa, bb))
            {
                result.Add(new CharPartRange((char)aa, (char)bb));
            }

            return(new CharClass(result));
        }
Пример #2
0
        public FA nfaFromRegEx(RExpr _e)
        {
            if (_e is ByName)
            {
                //ByName e = (ByName) _e;
                throw new System.NotImplementedException("nfaFromRegEx/ByName not implemented");
            }
            else if (_e is CharClass)
            {
                CharClass e = _e as CharClass;
                e = canonicalized(e);
                int start = newState();
                int end   = newState();

                return(fa(start, end).trans(start, end, charRange(e)));
            }
            else if (_e is NotCharClass)
            {
                NotCharClass __e   = _e as NotCharClass;
                CharClass    e     = canonicalized(__e);
                int          start = newState();
                int          end   = newState();

                return(fa(start, end).trans(start, end, charRange(e)));
            }
            else if (_e is Oring)
            {
                Oring      e             = _e as Oring;
                List <FA>  opts          = Utils.list(e.Exprs.Select(a => { return(nfaFromRegEx(a)); }));
                List <int> startEpsilons = Utils.list(opts.Select(a => { return(a.startState); }));
                List <int> endEpsilons   = Utils.list(opts.Select(a => { return(onlyAcceptingState(a)); }));

                int start = newState();
                int end   = newState();

                FA _fa = fa(start, end).merge(opts);

                for (int i = 0; i < opts.Count; ++i)
                {
                    _fa.trans(start, startEpsilons[i], epsilon());
                    _fa.trans(endEpsilons[i], end, epsilon());
                }
                return(_fa);
            }
            else if (_e is Plus)
            {
                Plus e  = _e as Plus;
                FA   fa = nfaFromRegEx(e.Expr);
                fa.trans(onlyAcceptingState(fa), fa.startState, epsilon());
                return(fa);
            }
            else if (_e is RXSeq)
            {
                RXSeq e = _e as RXSeq;
                if (e.Exprs.Count == 1)
                {
                    return(nfaFromRegEx(e.Exprs[0]));
                }
                List <FA> opts = Utils.list(e.Exprs.Select(a => { return(nfaFromRegEx(a)); }));
                FA        _fa  = fa(opts[0].startState, onlyAcceptingState(Utils.last(opts))).merge(opts);
                for (int i = 0; i < opts.Count - 1; ++i)
                {
                    int prev = onlyAcceptingState(opts [i]);
                    int next = opts [i + 1].startState;
                    _fa.trans(prev, next, epsilon());
                }
                return(_fa);
            }
            else if (_e is Star)
            {
                Star e  = _e as Star;
                FA   fa = nfaFromRegEx(e.Expr);
                fa.trans(onlyAcceptingState(fa), fa.startState, epsilon());
                fa.trans(fa.startState, onlyAcceptingState(fa), epsilon());
                return(fa);
            }
            else if (_e is Question)
            {
                Question e  = _e as Question;
                FA       fa = nfaFromRegEx(e.Expr);
                fa.trans(fa.startState, onlyAcceptingState(fa), epsilon());
                return(fa);
            }
            else if (_e is Str)
            {
                Str          e   = _e as Str;
                List <RExpr> seq = new List <RExpr>();
                // todo: iterating over a string, this would
                // not work with surrogate pairs...etc
                for (int i = 0; i < e.Value.Length; ++i)
                {
                    char c = e.Value[i];
                    List <CharClassPart> v  = Utils.list1((CharClassPart) new CharPartRange(c, c));
                    CharClass            cc = new CharClass(v);
                    seq.Add(cc);
                }
                return(nfaFromRegEx(new RXSeq(seq)));
            }
            else
            {
                throw new Exception("OptionNotHandledException(_e)");
            }
        }