Пример #1
0
        public bool contains(IntCharSet set)
        {
/*
 *  IntCharSet test = set.copy();
 *
 *  test.sub(this);
 *
 *  return (test.numIntervalls() == 0);
 * /*/      int i = 0;
            int j = 0;

            while (j < set.intervalls.Count)
            {
                Interval x = (Interval)intervalls[i];
                Interval y = (Interval)set.intervalls[j];

                if (x.contains(y))
                {
                    j++;
                }

                if (x.start > y.end)
                {
                    return(false);
                }
                if (x.end < y.start)
                {
                    i++;
                }
            }

            return(true); /* */
        }
Пример #2
0
        /**
         * Returns an array of all CharClassIntervalls in this
         * char class collection.
         *
         * The array is ordered by char code, i.e.
         * <code>result[i+1].start = result[i].end+1</code>
         *
         * Each CharClassInterval contains the number of the
         * char class it belongs to.
         */
        public CharClassInterval [] getIntervalls()
        {
            int i, c;
            int size          = classes.Count;
            int numIntervalls = 0;

            for (i = 0; i < size; i++)
            {
                numIntervalls += ((IntCharSet)classes[i]).numIntervalls();
            }

            CharClassInterval [] result = new CharClassInterval[numIntervalls];

            i = 0;
            c = 0;
            while (i < numIntervalls)
            {
                int        code = getClassCode((char)c);
                IntCharSet set  = (IntCharSet)classes[code];
                Interval   iv   = set.getNext();

                result[i++] = new CharClassInterval(iv.start, iv.end, code);
                c           = iv.end + 1;
            }

            return(result);
        }
Пример #3
0
        /**
         * Check consistency of the stored classes [debug].
         *
         * all classes must be disjoint, checks if all characters
         * have a class assigned.
         */
        public void check()
        {
            for (int i = 0; i < classes.Count; i++)
            {
                for (int j = i + 1; j < classes.Count; j++)
                {
                    IntCharSet x = (IntCharSet)classes[i];
                    IntCharSet y = (IntCharSet)classes[j];
                    if (x.and(y).containsElements())
                    {
                        Console.WriteLine("Error: non disjoint char classes {0} and {1}", i, j);
                        Console.WriteLine("class {0}: {1}", i, x);
                        Console.WriteLine("class {0}: {1}", j, y);
                    }
                }
            }

            // check if each character has a classcode
            // (= if getClassCode terminates)
            for (char c = (char)0; c < maxChar; c++)
            {
                getClassCode(c);
                if (c % 100 == 0)
                {
                    Console.Write(".");
                }
            }

            getClassCode(maxChar);
        }
Пример #4
0
 public IntCharSet add(IntCharSet set)
 {
     for (int i = 0; i < set.intervalls.Count; i++)
     {
         add((Interval)set.intervalls[i]);
     }
     return(this);
 }
Пример #5
0
        /* intersection */
        public IntCharSet and(IntCharSet set)
        {
            if (DEBUG)
            {
                Out.dump("intersection");
                Out.dump("this  : " + this);
                Out.dump("other : " + set);
            }

            IntCharSet result = new IntCharSet();

            int i = 0; // index in this.intervalls
            int j = 0; // index in set.intervalls

            int size    = intervalls.Count;
            int setSize = set.intervalls.Count;

            while (i < size && j < setSize)
            {
                Interval x = (Interval)this.intervalls[i];
                Interval y = (Interval)set.intervalls[j];

                if (x.end < y.start)
                {
                    i++;
                    continue;
                }

                if (y.end < x.start)
                {
                    j++;
                    continue;
                }

                result.intervalls.Add(
                    new Interval(
                        max(x.start, y.start),
                        min(x.end, y.end)
                        )
                    );

                if (x.end >= y.end)
                {
                    j++;
                }
                if (y.end >= x.end)
                {
                    i++;
                }
            }

            if (DEBUG)
            {
                Out.dump("result: " + result);
            }

            return(result);
        }
Пример #6
0
        /**
         * Updates the current partition, so that the specified set of characters
         * gets a new character class.
         *
         * Characters that are elements of <code>set</code> are not in the same
         * equivalence class with characters that are not elements of <code>set</code>.
         *
         * @param set       the set of characters to distinguish from the rest
         * @param caseless  if true upper/lower/title case are considered equivalent
         */
        public void makeClass(IntCharSet set, bool caseless)
        {
            if (caseless)
            {
                set = set.getCaseless();
            }

            if (DEBUG)
            {
                Out.dump("makeClass(" + set + ")");
                dump();
            }

            try
            {
                int oldSize = classes.Count;
                for (int i = 0; i < oldSize; i++)
                {
                    IntCharSet x = (IntCharSet)classes[i];

                    if (x.Equals(set))
                    {
                        return;
                    }

                    IntCharSet and = x.and(set);

                    if (and.containsElements())
                    {
                        if (x.Equals(and))
                        {
                            set.sub(and);
                            continue;
                        }
                        else if (set.Equals(and))
                        {
                            x.sub(and);
                            classes.Add(and);
                            return;
                        }

                        set.sub(and);
                        x.sub(and);
                        classes.Add(and);
                    }
                }
            }
            finally
            {
                if (DEBUG)
                {
                    Out.dump("makeClass(..) finished");
                    dump();
                }
            }
        }
Пример #7
0
        /**
         * Returns the last set of the expression
         *
         * (the last-charater-projection of the language)
         */
        private static IntCharSet last(RegExp re)
        {
            RegExp2 r;

            switch (re.type)
            {
            case sym.BAR:
                r = (RegExp2)re;
                return(last(r.r1).add(last(r.r2)));

            case sym.CONCAT:
                r = (RegExp2)re;
                if (containsEpsilon(r.r2))
                {
                    return(last(r.r1).add(last(r.r2)));
                }
                else
                {
                    return(last(r.r2));
                }

            case sym.STAR:
            case sym.PLUS:
            case sym.QUESTION:
                return(last((RegExp)((RegExp1)re).content));

            case sym.CCLASS:
                return(new IntCharSet((ArrayList)((RegExp1)re).content));

            case sym.CCLASSNOT:
                IntCharSet all = new IntCharSet(new Interval((char)0, maxChar));
                IntCharSet set = new IntCharSet((ArrayList)((RegExp1)re).content);
                all.sub(set);
                return(all);

            case sym.CHAR:
                return(new IntCharSet((char)((RegExp1)re).content));

            case sym.STRING:
                String content = (String)((RegExp1)re).content;
                if (content.Length > 0)
                {
                    return(new IntCharSet(content[content.Length - 1]));
                }
                else
                {
                    return(new IntCharSet());
                }

            case sym.MACROUSE:
                return(last(macros.getDefinition((String)((RegExp1)re).content)));
            }

            throw new Exception("Unkown expression type " + re.type + " in " + re); //$NON-NLS-1$ //$NON-NLS-2$
        }
Пример #8
0
        /**
         * Return a (deep) copy of this char set
         *
         * @return the copy
         */
        public IntCharSet copy()
        {
            IntCharSet result = new IntCharSet();
            int        size   = intervalls.Count;

            for (int i = 0; i < size; i++)
            {
                Interval iv = ((Interval)intervalls[i]).copy();
                result.intervalls.Add(iv);
            }
            return(result);
        }
Пример #9
0
        /**
         * Returns the code of the character class the specified character belongs to.
         */
        public int getClassCode(char letter)
        {
            int i = -1;

            while (true)
            {
                IntCharSet x = (IntCharSet)classes[++i];
                if (x.contains(letter))
                {
                    return(i);
                }
            }
        }
Пример #10
0
        /**
         * Returns an array that contains the character class codes of all characters
         * in the specified set of input characters.
         */
        private int [] getClassCodes(IntCharSet set, bool negate)
        {
            if (DEBUG)
            {
                Out.dump("getting class codes for " + set);
                if (negate)
                {
                    Out.dump("[negated]");
                }
            }

            int size = classes.Count;

            // [fixme: optimize]
            int[] temp   = new int [size];
            int   length = 0;

            for (int i = 0; i < size; i++)
            {
                IntCharSet x = (IntCharSet)classes[i];
                if (negate)
                {
                    if (!set.and(x).containsElements())
                    {
                        temp[length++] = i;
                        if (DEBUG)
                        {
                            Out.dump("code " + i);
                        }
                    }
                }
                else
                {
                    if (set.and(x).containsElements())
                    {
                        temp[length++] = i;
                        if (DEBUG)
                        {
                            Out.dump("code " + i);
                        }
                    }
                }
            }

            int[] result = new int [length];
            Array.Copy(temp, 0, result, 0, length);

            return(result);
        }
Пример #11
0
        /**
         * Create a caseless version of this charset.
         * <p>
         * The caseless version contains all characters of this char set,
         * and additionally all lower/upper/title case variants of the
         * characters in this set.
         *
         * @return a caseless copy of this set
         */
        public IntCharSet getCaseless()
        {
            IntCharSet n = copy();

            int size = intervalls.Count;

            for (int i = 0; i < size; i++)
            {
                Interval elem = (Interval)intervalls[i];
                for (char c = elem.start; c <= elem.end; c++)
                {
                    n.add(char.ToLower(c));
                    n.add(char.ToUpper(c));
                    //n.add(char.toTitleCase(c));
                }
            }

            return(n);
        }
Пример #12
0
        /**
         * o instanceof Interval
         */
        public override bool Equals(Object o)
        {
            IntCharSet set = (IntCharSet)o;

            if (intervalls.Count != set.intervalls.Count)
            {
                return(false);
            }

            for (int i = 0; i < intervalls.Count; i++)
            {
                if (!intervalls[i].Equals(set.intervalls[i]))
                {
                    return(false);
                }
            }

            return(true);
        }
Пример #13
0
        /* complement */
        /* prec: this.contains(set), set != null */
        public void sub(IntCharSet set)
        {
            if (DEBUG)
            {
                Out.dump("complement");
                Out.dump("this  : " + this);
                Out.dump("other : " + set);
            }

            int i = 0; // index in this.intervalls
            int j = 0; // index in set.intervalls

            int setSize = set.intervalls.Count;

            while (i < intervalls.Count && j < setSize)
            {
                Interval x = (Interval)this.intervalls[i];
                Interval y = (Interval)set.intervalls[j];

                if (DEBUG)
                {
                    Out.dump("this      : " + this);
                    Out.dump("this  [" + i + "] : " + x);
                    Out.dump("other [" + j + "] : " + y);
                }

                if (x.end < y.start)
                {
                    i++;
                    continue;
                }

                if (y.end < x.start)
                {
                    j++;
                    continue;
                }

                // x.end >= y.start && y.end >= x.start ->
                // x.end <= y.end && x.start >= y.start (prec)

                if (x.start == y.start && x.end == y.end)
                {
                    intervalls.RemoveAt(i);
                    j++;
                    continue;
                }

                // x.end <= y.end && x.start >= y.start &&
                // (x.end < y.end || x.start > y.start) ->
                // x.start < x.end

                if (x.start == y.start)
                {
                    x.start = (char)(y.end + 1);
                    j++;
                    continue;
                }

                if (x.end == y.end)
                {
                    x.end = (char)(y.start - 1);
                    i++;
                    j++;
                    continue;
                }

                intervalls.Insert(i, new Interval(x.start, (char)(y.start - 1)));
                x.start = (char)(y.end + 1);

                i++;
                j++;
            }

            if (DEBUG)
            {
                Out.dump("result: " + this);
            }
        }