public bool contains(IntCharSet set) { /* * IntCharSet test = set.copy(); * * test.sub(this); * * return (test.numIntervalls() == 0); * /*/ int i = 0; int j = 0; while (j < set.intervalls.Count) { Interval x = (Interval)intervalls[i]; Interval y = (Interval)set.intervalls[j]; if (x.contains(y)) { j++; } if (x.start > y.end) { return(false); } if (x.end < y.start) { i++; } } return(true); /* */ }
/** * Returns an array of all CharClassIntervalls in this * char class collection. * * The array is ordered by char code, i.e. * <code>result[i+1].start = result[i].end+1</code> * * Each CharClassInterval contains the number of the * char class it belongs to. */ public CharClassInterval [] getIntervalls() { int i, c; int size = classes.Count; int numIntervalls = 0; for (i = 0; i < size; i++) { numIntervalls += ((IntCharSet)classes[i]).numIntervalls(); } CharClassInterval [] result = new CharClassInterval[numIntervalls]; i = 0; c = 0; while (i < numIntervalls) { int code = getClassCode((char)c); IntCharSet set = (IntCharSet)classes[code]; Interval iv = set.getNext(); result[i++] = new CharClassInterval(iv.start, iv.end, code); c = iv.end + 1; } return(result); }
/** * Check consistency of the stored classes [debug]. * * all classes must be disjoint, checks if all characters * have a class assigned. */ public void check() { for (int i = 0; i < classes.Count; i++) { for (int j = i + 1; j < classes.Count; j++) { IntCharSet x = (IntCharSet)classes[i]; IntCharSet y = (IntCharSet)classes[j]; if (x.and(y).containsElements()) { Console.WriteLine("Error: non disjoint char classes {0} and {1}", i, j); Console.WriteLine("class {0}: {1}", i, x); Console.WriteLine("class {0}: {1}", j, y); } } } // check if each character has a classcode // (= if getClassCode terminates) for (char c = (char)0; c < maxChar; c++) { getClassCode(c); if (c % 100 == 0) { Console.Write("."); } } getClassCode(maxChar); }
public IntCharSet add(IntCharSet set) { for (int i = 0; i < set.intervalls.Count; i++) { add((Interval)set.intervalls[i]); } return(this); }
/* intersection */ public IntCharSet and(IntCharSet set) { if (DEBUG) { Out.dump("intersection"); Out.dump("this : " + this); Out.dump("other : " + set); } IntCharSet result = new IntCharSet(); int i = 0; // index in this.intervalls int j = 0; // index in set.intervalls int size = intervalls.Count; int setSize = set.intervalls.Count; while (i < size && j < setSize) { Interval x = (Interval)this.intervalls[i]; Interval y = (Interval)set.intervalls[j]; if (x.end < y.start) { i++; continue; } if (y.end < x.start) { j++; continue; } result.intervalls.Add( new Interval( max(x.start, y.start), min(x.end, y.end) ) ); if (x.end >= y.end) { j++; } if (y.end >= x.end) { i++; } } if (DEBUG) { Out.dump("result: " + result); } return(result); }
/** * Updates the current partition, so that the specified set of characters * gets a new character class. * * Characters that are elements of <code>set</code> are not in the same * equivalence class with characters that are not elements of <code>set</code>. * * @param set the set of characters to distinguish from the rest * @param caseless if true upper/lower/title case are considered equivalent */ public void makeClass(IntCharSet set, bool caseless) { if (caseless) { set = set.getCaseless(); } if (DEBUG) { Out.dump("makeClass(" + set + ")"); dump(); } try { int oldSize = classes.Count; for (int i = 0; i < oldSize; i++) { IntCharSet x = (IntCharSet)classes[i]; if (x.Equals(set)) { return; } IntCharSet and = x.and(set); if (and.containsElements()) { if (x.Equals(and)) { set.sub(and); continue; } else if (set.Equals(and)) { x.sub(and); classes.Add(and); return; } set.sub(and); x.sub(and); classes.Add(and); } } } finally { if (DEBUG) { Out.dump("makeClass(..) finished"); dump(); } } }
/** * Returns the last set of the expression * * (the last-charater-projection of the language) */ private static IntCharSet last(RegExp re) { RegExp2 r; switch (re.type) { case sym.BAR: r = (RegExp2)re; return(last(r.r1).add(last(r.r2))); case sym.CONCAT: r = (RegExp2)re; if (containsEpsilon(r.r2)) { return(last(r.r1).add(last(r.r2))); } else { return(last(r.r2)); } case sym.STAR: case sym.PLUS: case sym.QUESTION: return(last((RegExp)((RegExp1)re).content)); case sym.CCLASS: return(new IntCharSet((ArrayList)((RegExp1)re).content)); case sym.CCLASSNOT: IntCharSet all = new IntCharSet(new Interval((char)0, maxChar)); IntCharSet set = new IntCharSet((ArrayList)((RegExp1)re).content); all.sub(set); return(all); case sym.CHAR: return(new IntCharSet((char)((RegExp1)re).content)); case sym.STRING: String content = (String)((RegExp1)re).content; if (content.Length > 0) { return(new IntCharSet(content[content.Length - 1])); } else { return(new IntCharSet()); } case sym.MACROUSE: return(last(macros.getDefinition((String)((RegExp1)re).content))); } throw new Exception("Unkown expression type " + re.type + " in " + re); //$NON-NLS-1$ //$NON-NLS-2$ }
/** * Return a (deep) copy of this char set * * @return the copy */ public IntCharSet copy() { IntCharSet result = new IntCharSet(); int size = intervalls.Count; for (int i = 0; i < size; i++) { Interval iv = ((Interval)intervalls[i]).copy(); result.intervalls.Add(iv); } return(result); }
/** * Returns the code of the character class the specified character belongs to. */ public int getClassCode(char letter) { int i = -1; while (true) { IntCharSet x = (IntCharSet)classes[++i]; if (x.contains(letter)) { return(i); } } }
/** * Returns an array that contains the character class codes of all characters * in the specified set of input characters. */ private int [] getClassCodes(IntCharSet set, bool negate) { if (DEBUG) { Out.dump("getting class codes for " + set); if (negate) { Out.dump("[negated]"); } } int size = classes.Count; // [fixme: optimize] int[] temp = new int [size]; int length = 0; for (int i = 0; i < size; i++) { IntCharSet x = (IntCharSet)classes[i]; if (negate) { if (!set.and(x).containsElements()) { temp[length++] = i; if (DEBUG) { Out.dump("code " + i); } } } else { if (set.and(x).containsElements()) { temp[length++] = i; if (DEBUG) { Out.dump("code " + i); } } } } int[] result = new int [length]; Array.Copy(temp, 0, result, 0, length); return(result); }
/** * Create a caseless version of this charset. * <p> * The caseless version contains all characters of this char set, * and additionally all lower/upper/title case variants of the * characters in this set. * * @return a caseless copy of this set */ public IntCharSet getCaseless() { IntCharSet n = copy(); int size = intervalls.Count; for (int i = 0; i < size; i++) { Interval elem = (Interval)intervalls[i]; for (char c = elem.start; c <= elem.end; c++) { n.add(char.ToLower(c)); n.add(char.ToUpper(c)); //n.add(char.toTitleCase(c)); } } return(n); }
/** * o instanceof Interval */ public override bool Equals(Object o) { IntCharSet set = (IntCharSet)o; if (intervalls.Count != set.intervalls.Count) { return(false); } for (int i = 0; i < intervalls.Count; i++) { if (!intervalls[i].Equals(set.intervalls[i])) { return(false); } } return(true); }
/* complement */ /* prec: this.contains(set), set != null */ public void sub(IntCharSet set) { if (DEBUG) { Out.dump("complement"); Out.dump("this : " + this); Out.dump("other : " + set); } int i = 0; // index in this.intervalls int j = 0; // index in set.intervalls int setSize = set.intervalls.Count; while (i < intervalls.Count && j < setSize) { Interval x = (Interval)this.intervalls[i]; Interval y = (Interval)set.intervalls[j]; if (DEBUG) { Out.dump("this : " + this); Out.dump("this [" + i + "] : " + x); Out.dump("other [" + j + "] : " + y); } if (x.end < y.start) { i++; continue; } if (y.end < x.start) { j++; continue; } // x.end >= y.start && y.end >= x.start -> // x.end <= y.end && x.start >= y.start (prec) if (x.start == y.start && x.end == y.end) { intervalls.RemoveAt(i); j++; continue; } // x.end <= y.end && x.start >= y.start && // (x.end < y.end || x.start > y.start) -> // x.start < x.end if (x.start == y.start) { x.start = (char)(y.end + 1); j++; continue; } if (x.end == y.end) { x.end = (char)(y.start - 1); i++; j++; continue; } intervalls.Insert(i, new Interval(x.start, (char)(y.start - 1))); x.start = (char)(y.end + 1); i++; j++; } if (DEBUG) { Out.dump("result: " + this); } }