private List <CharPartRange> canonicalized(List <CharClassPart> e) { List <CharPartRange> l2 = new List <CharPartRange>(); foreach (CharClassPart c in e) { if (c is CharPartSingle) { CharPartSingle cs = c as CharPartSingle; l2.Add(new CharPartRange(cs.Ch, cs.Ch)); } else if (c is CharPartRange) { CharPartRange cr = c as CharPartRange; if (cr.From > cr.To) { cr = new CharPartRange(cr.To, cr.From); } l2.Add(cr); } else { throw new ArgumentException(c.ToString()); } } l2.Sort((a, b) => a.From.CompareTo(b.From)); return(l2); }
private List <Trans> charRange(CharClass cc) { List <Trans> ret = new List <Trans>(); foreach (CharClassPart _cr in cc.Parts) { // Assumes the CharClass has been canonicalized // i.e all parts are ranges CharPartRange cr = _cr as CharPartRange; ret.Add(new CharRange(cr.From, cr.To)); } return(ret); }
private CharClass canonicalized(NotCharClass e) { /* * Here we convert something like [^a-zA-Z] into a vector of CharPartRange objects * we start with the 'any char' range, from 0 to 0xffff, and we subtract each CharClassPart in turn */ // todo: // Since the code tries to simplify things by converting // all CharClassPart objects into ranges, we will treat 'Any' // as a range from 0 to 0xffff (the maximum value for a char, i.e 16-bit Unicode) // this probably wreaks havoc with internationalization, but we're using chars // and ignoring anything more than 16bit for now anyway. If we later use a more advanced library // we need to deal with 'Any' and 'Not' for CharClasses in a more general // (and encoding independent) way. // Note that the code below assumes the 'excluded' vector is sorted // which is provided by the called other canonicalized(..) function List <CharPartRange> excluded = canonicalized(e.Parts); List <CharClassPart> result = new List <CharClassPart>(); int start = 0, end = 0xffff; for (int i = 0; i < excluded.Count; ++i) { CharPartRange r = excluded[i]; // It is important for a, b to be SIGNED ints, so that From.unicode()-1 can be negative // and To.unicode()+1 doesn't wrap around int a = start, b = r.From - 1; if (valid(a, b)) { result.Add(new CharPartRange((char)a, (char)b)); } start = r.To + 1; } int aa = start, bb = end; if (valid(aa, bb)) { result.Add(new CharPartRange((char)aa, (char)bb)); } return(new CharClass(result)); }