public StringBuffer Prepare(String src, StringPrepOptions options) { int ch; String mapOut = Map(src, options); UCharacterIterator iter = UCharacterIterator.GetInstance(mapOut); UCharacterDirection direction = UCharacterDirectionExtensions.CharDirectionCount, firstCharDir = UCharacterDirectionExtensions.CharDirectionCount; int rtlPos = -1, ltrPos = -1; bool rightToLeft = false, leftToRight = false; while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done) { if (transform.prohibitedSet.Contains(ch) == true && ch != 0x0020) { throw new StringPrepParseException("A prohibited code point was found in the input", StringPrepErrorType.ProhibitedError, iter.GetText(), iter.Index); } direction = UChar.GetDirection(ch); if (firstCharDir == UCharacterDirectionExtensions.CharDirectionCount) { firstCharDir = direction; } if (direction == UCharacterDirection.LeftToRight) { leftToRight = true; ltrPos = iter.Index - 1; } if (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic) { rightToLeft = true; rtlPos = iter.Index - 1; } } // satisfy 2 if (leftToRight == true && rightToLeft == true) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } //satisfy 3 if (rightToLeft == true && !((firstCharDir == UCharacterDirection.RightToLeft || firstCharDir == UCharacterDirection.RightToLeftArabic) && (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic)) ) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } return(new StringBuffer(mapOut)); }
// TODO: optimize public static int Compare(UCharacterIterator i1, UCharacterIterator i2, IDNA2003Options options) { if (i1 == null || i2 == null) { throw new ArgumentException("One of the source buffers is null"); } StringBuffer s1Out = ConvertIDNToASCII(i1.GetText(), options); StringBuffer s2Out = ConvertIDNToASCII(i2.GetText(), options); return(CompareCaseInsensitiveASCII(s1Out, s2Out)); }
public void getText(UCharacterIterator iterator, String result) { /* test getText */ char[] buf = new char[1]; for (; ;) { try { iterator.GetText(buf); break; } catch (IndexOutOfRangeException e) { buf = new char[iterator.Length]; } } if (result.CompareToOrdinal(new string(buf, 0, iterator.Length)) != 0) { Errln("getText failed for iterator"); } }
public static StringBuffer ConvertIDNToUnicode(UCharacterIterator iter, IDNA2003Options options) { return(ConvertIDNToUnicode(iter.GetText(), options)); }
public static StringBuffer ConvertToUnicode(UCharacterIterator iter, IDNA2003Options options) { // the source contains all ascii codepoints bool srcIsASCII = true; int ch; int saveIndex = iter.Index; // step 1: find out if all the codepoints in src are ASCII while ((ch = iter.Next()) != UCharacterIterator.DONE) { if (ch > 0x7F) { srcIsASCII = false; break; } } // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> do { StringBuffer processOut; if (srcIsASCII == false) { // step 2: process the string iter.Index = (saveIndex); try { processOut = transform.Prepare(iter, (StringPrepOptions)options); } catch (StringPrepParseException e) { break; } } else { // just point to source processOut = new StringBuffer(iter.GetText()); } // step 3: verify ACE Prefix if (StartsWithPrefix(processOut)) { // step 4: Remove the ACE Prefix String temp = processOut.ToString(ACE_PREFIX_LENGTH, processOut.Length - ACE_PREFIX_LENGTH); // step 5: Decode using punycode StringBuffer decodeOut = null; try { decodeOut = PunycodeReference.Decode(new StringBuffer(temp), null); } catch (StringPrepParseException e) { break; } // step 6:Apply toASCII StringBuffer toASCIIOut = ConvertToASCII(decodeOut, options); // step 7: verify if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) { break; } // step 8: return output of step 5 return(decodeOut); } } while (false); return(new StringBuffer(iter.GetText())); }
public static StringBuffer ConvertToASCII(UCharacterIterator srcIter, IDNA2003Options options) { char[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints bool srcIsLDH = true; //get the options bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); int ch; // step 1 while ((ch = srcIter.Next()) != UCharacterIterator.DONE) { if (ch > 0x7f) { srcIsASCII = false; } } int failPos = -1; srcIter.SetToStart(); StringBuffer processOut = null; // step 2 is performed only if the source contains non ASCII if (!srcIsASCII) { // step 2 processOut = transform.Prepare(srcIter, (StringPrepOptions)options); } else { processOut = new StringBuffer(srcIter.GetText()); } int poLen = processOut.Length; if (poLen == 0) { throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel); } StringBuffer dest = new StringBuffer(); // reset the variable to verify if output of prepare is ASCII or not srcIsASCII = true; // step 3 & 4 for (int j = 0; j < poLen; j++) { ch = processOut[j]; if (ch > 0x7F) { srcIsASCII = false; } else if (IsLDHChar(ch) == false) { // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = false; failPos = j; } } if (useSTD3ASCIIRules == true) { // verify 3a and 3b if (srcIsLDH == false || /* source contains some non-LDH characters */ processOut[0] == HYPHEN || processOut[processOut.Length - 1] == HYPHEN) { /* populate the parseError struct */ if (srcIsLDH == false) { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), (failPos > 0) ? (failPos - 1) : failPos); } else if (processOut[0] == HYPHEN) { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), 0); } else { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), (poLen > 0) ? poLen - 1 : poLen); } } } if (srcIsASCII) { dest = processOut; } else { // step 5 : verify the sequence does not begin with ACE prefix if (!StartsWithPrefix(processOut)) { //step 6: encode the sequence with punycode StringBuffer punyout = PunycodeReference.Encode(processOut, caseFlags); // convert all codepoints to lower case ASCII StringBuffer lowerOut = ToASCIILower(punyout); //Step 7: prepend the ACE prefix dest.Append(ACE_PREFIX, 0, ACE_PREFIX_LENGTH - 0); // ICU4N: Checked 3rd parameter //Step 6: copy the contents in b2 into dest dest.Append(lowerOut); } else { throw new StringPrepParseException("The input does not start with the ACE Prefix.", StringPrepErrorType.AcePrefixError, processOut.ToString(), 0); } } if (dest.Length > MAX_LABEL_LENGTH) { throw new StringPrepParseException("The labels in the input are too long. Length > 64.", StringPrepErrorType.LabelTooLongError, dest.ToString(), 0); } return(dest); }
public static StringBuffer ConvertToUnicode(UCharacterIterator src, IDNA2003Options options) { bool[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints //bool srcIsLDH = true; //get the options //bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); //int failPos = -1; int ch; int saveIndex = src.Index; // step 1: find out if all the codepoints in src are ASCII while ((ch = src.Next()) != UCharacterIterator.DONE) { if (ch > 0x7F) { srcIsASCII = false; }/*else if((srcIsLDH = isLDHChar(ch))==false){ * failPos = src.getIndex(); * }*/ } StringBuffer processOut; if (srcIsASCII == false) { try { // step 2: process the string src.Index = saveIndex; processOut = namePrep.Prepare(src, (StringPrepOptions)options); } catch (StringPrepParseException ex) { return(new StringBuffer(src.GetText())); } } else { //just point to source processOut = new StringBuffer(src.GetText()); } // TODO: // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> //step 3: verify ACE Prefix if (StartsWithPrefix(processOut)) { StringBuffer decodeOut = null; //step 4: Remove the ACE Prefix string temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length); //step 5: Decode using punycode try { decodeOut = new StringBuffer(Punycode.Decode(temp, caseFlags).ToString()); } catch (StringPrepParseException e) { decodeOut = null; } //step 6:Apply toASCII if (decodeOut != null) { StringBuffer toASCIIOut = ConvertToASCII(UCharacterIterator.GetInstance(decodeOut), options); //step 7: verify if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) { // throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed", // StringPrepParseException.VERIFICATION_ERROR); decodeOut = null; } } //step 8: return output of step 5 if (decodeOut != null) { return(decodeOut); } } // }else{ // // verify that STD3 ASCII rules are satisfied // if(useSTD3ASCIIRules == true){ // if( srcIsLDH == false /* source contains some non-LDH characters */ // || processOut.charAt(0) == HYPHEN // || processOut.charAt(processOut.Length-1) == HYPHEN){ // // if(srcIsLDH==false){ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(), // (failPos>0) ? (failPos-1) : failPos); // }else if(processOut.charAt(0) == HYPHEN){ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(),0); // // }else{ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(), // processOut.Length); // // } // } // } // // just return the source // return new StringBuffer(src.getText()); // } return(new StringBuffer(src.GetText())); }
public StringBuffer Prepare(UCharacterIterator src, StringPrepOptions options) { return(Prepare(src.GetText(), options)); }
public void TestIterationUChar32() { String text = "\u0061\u0062\ud841\udc02\u20ac\ud7ff\ud842\udc06\ud801\udc00\u0061"; int c; int i; { UCharacterIterator iter = UCharacterIterator.GetInstance(text); String iterText = iter.GetText(); if (!iterText.Equals(text)) { Errln("iter.getText() failed"); } iter.Index = (1); if (iter.CurrentCodePoint != UTF16.CharAt(text, 1)) { Errln("Iterator didn't start out in the right place."); } iter.SetToStart(); c = iter.CurrentCodePoint; i = 0; i = iter.MoveCodePointIndex(1); c = iter.CurrentCodePoint; if (c != UTF16.CharAt(text, 1) || i != 1) { Errln("moveCodePointIndex(1) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 1)) + " i= " + i); } i = iter.MoveCodePointIndex(2); c = iter.CurrentCodePoint; if (c != UTF16.CharAt(text, 4) || i != 4) { Errln("moveCodePointIndex(2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 4)) + " i= " + i); } i = iter.MoveCodePointIndex(-2); c = iter.CurrentCodePoint; if (c != UTF16.CharAt(text, 1) || i != 1) { Errln("moveCodePointIndex(-2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 1)) + " i= " + i); } iter.SetToLimit(); i = iter.MoveCodePointIndex(-2); c = iter.CurrentCodePoint; if (c != UTF16.CharAt(text, (text.Length - 3)) || i != (text.Length - 3)) { Errln("moveCodePointIndex(-2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, (text.Length - 3))) + " i= " + i); } iter.SetToStart(); c = iter.CurrentCodePoint; i = 0; //testing first32PostInc, nextCodePointPostInc, setTostart i = 0; iter.SetToStart(); c = iter.Next(); if (c != UTF16.CharAt(text, i)) { Errln("first32PostInc failed. Expected->" + Hex(UTF16.CharAt(text, i)) + " Got-> " + Hex(c)); } if (iter.Index != UTF16.GetCharCount(c) + i) { Errln("getIndex() after first32PostInc() failed"); } iter.SetToStart(); i = 0; if (iter.Index != 0) { Errln("setToStart failed"); } Logln("Testing forward iteration..."); do { if (c != UCharacterIterator.DONE) { c = iter.NextCodePoint(); } if (c != UTF16.CharAt(text, i)) { Errln("Character mismatch at position " + i + ", iterator has " + Hex(c) + ", string has " + Hex(UTF16.CharAt(text, i))); } i += UTF16.GetCharCount(c); if (iter.Index != i) { Errln("getIndex() aftr nextCodePointPostInc() isn't working right"); } c = iter.CurrentCodePoint; if (c != UCharacterIterator.DONE && c != UTF16.CharAt(text, i)) { Errln("current() after nextCodePointPostInc() isn't working right"); } } while (c != UCharacterIterator.DONE); c = iter.NextCodePoint(); if (c != UCharacterIterator.DONE) { Errln("nextCodePointPostInc() didn't return DONE at the beginning"); } } }