public void TestClone() { UCharacterIterator iterator = UCharacterIterator.GetInstance("testing"); UCharacterIterator cloned = (UCharacterIterator)iterator.Clone(); int completed = 0; while (completed != UCharacterIterator.DONE) { completed = iterator.Next(); if (completed != cloned.Next()) { Errln("Cloned operation failed"); } } }
public static StringBuffer ConvertToUnicode(UCharacterIterator iter, IDNA2003Options options) { // the source contains all ascii codepoints bool srcIsASCII = true; int ch; int saveIndex = iter.Index; // step 1: find out if all the codepoints in src are ASCII while ((ch = iter.Next()) != UCharacterIterator.DONE) { if (ch > 0x7F) { srcIsASCII = false; break; } } // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> do { StringBuffer processOut; if (srcIsASCII == false) { // step 2: process the string iter.Index = (saveIndex); try { processOut = transform.Prepare(iter, (StringPrepOptions)options); } catch (StringPrepParseException e) { break; } } else { // just point to source processOut = new StringBuffer(iter.GetText()); } // step 3: verify ACE Prefix if (StartsWithPrefix(processOut)) { // step 4: Remove the ACE Prefix String temp = processOut.ToString(ACE_PREFIX_LENGTH, processOut.Length - ACE_PREFIX_LENGTH); // step 5: Decode using punycode StringBuffer decodeOut = null; try { decodeOut = PunycodeReference.Decode(new StringBuffer(temp), null); } catch (StringPrepParseException e) { break; } // step 6:Apply toASCII StringBuffer toASCIIOut = ConvertToASCII(decodeOut, options); // step 7: verify if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) { break; } // step 8: return output of step 5 return(decodeOut); } } while (false); return(new StringBuffer(iter.GetText())); }
public static StringBuffer ConvertToASCII(UCharacterIterator srcIter, IDNA2003Options options) { char[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints bool srcIsLDH = true; //get the options bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); int ch; // step 1 while ((ch = srcIter.Next()) != UCharacterIterator.DONE) { if (ch > 0x7f) { srcIsASCII = false; } } int failPos = -1; srcIter.SetToStart(); StringBuffer processOut = null; // step 2 is performed only if the source contains non ASCII if (!srcIsASCII) { // step 2 processOut = transform.Prepare(srcIter, (StringPrepOptions)options); } else { processOut = new StringBuffer(srcIter.GetText()); } int poLen = processOut.Length; if (poLen == 0) { throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel); } StringBuffer dest = new StringBuffer(); // reset the variable to verify if output of prepare is ASCII or not srcIsASCII = true; // step 3 & 4 for (int j = 0; j < poLen; j++) { ch = processOut[j]; if (ch > 0x7F) { srcIsASCII = false; } else if (IsLDHChar(ch) == false) { // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = false; failPos = j; } } if (useSTD3ASCIIRules == true) { // verify 3a and 3b if (srcIsLDH == false || /* source contains some non-LDH characters */ processOut[0] == HYPHEN || processOut[processOut.Length - 1] == HYPHEN) { /* populate the parseError struct */ if (srcIsLDH == false) { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), (failPos > 0) ? (failPos - 1) : failPos); } else if (processOut[0] == HYPHEN) { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), 0); } else { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), (poLen > 0) ? poLen - 1 : poLen); } } } if (srcIsASCII) { dest = processOut; } else { // step 5 : verify the sequence does not begin with ACE prefix if (!StartsWithPrefix(processOut)) { //step 6: encode the sequence with punycode StringBuffer punyout = PunycodeReference.Encode(processOut, caseFlags); // convert all codepoints to lower case ASCII StringBuffer lowerOut = ToASCIILower(punyout); //Step 7: prepend the ACE prefix dest.Append(ACE_PREFIX, 0, ACE_PREFIX_LENGTH - 0); // ICU4N: Checked 3rd parameter //Step 6: copy the contents in b2 into dest dest.Append(lowerOut); } else { throw new StringPrepParseException("The input does not start with the ACE Prefix.", StringPrepErrorType.AcePrefixError, processOut.ToString(), 0); } } if (dest.Length > MAX_LABEL_LENGTH) { throw new StringPrepParseException("The labels in the input are too long. Length > 64.", StringPrepErrorType.LabelTooLongError, dest.ToString(), 0); } return(dest); }
public static StringBuffer ConvertToUnicode(UCharacterIterator src, IDNA2003Options options) { bool[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints //bool srcIsLDH = true; //get the options //bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); //int failPos = -1; int ch; int saveIndex = src.Index; // step 1: find out if all the codepoints in src are ASCII while ((ch = src.Next()) != UCharacterIterator.DONE) { if (ch > 0x7F) { srcIsASCII = false; }/*else if((srcIsLDH = isLDHChar(ch))==false){ * failPos = src.getIndex(); * }*/ } StringBuffer processOut; if (srcIsASCII == false) { try { // step 2: process the string src.Index = saveIndex; processOut = namePrep.Prepare(src, (StringPrepOptions)options); } catch (StringPrepParseException ex) { return(new StringBuffer(src.GetText())); } } else { //just point to source processOut = new StringBuffer(src.GetText()); } // TODO: // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> //step 3: verify ACE Prefix if (StartsWithPrefix(processOut)) { StringBuffer decodeOut = null; //step 4: Remove the ACE Prefix string temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length); //step 5: Decode using punycode try { decodeOut = new StringBuffer(Punycode.Decode(temp, caseFlags).ToString()); } catch (StringPrepParseException e) { decodeOut = null; } //step 6:Apply toASCII if (decodeOut != null) { StringBuffer toASCIIOut = ConvertToASCII(UCharacterIterator.GetInstance(decodeOut), options); //step 7: verify if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) { // throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed", // StringPrepParseException.VERIFICATION_ERROR); decodeOut = null; } } //step 8: return output of step 5 if (decodeOut != null) { return(decodeOut); } } // }else{ // // verify that STD3 ASCII rules are satisfied // if(useSTD3ASCIIRules == true){ // if( srcIsLDH == false /* source contains some non-LDH characters */ // || processOut.charAt(0) == HYPHEN // || processOut.charAt(processOut.Length-1) == HYPHEN){ // // if(srcIsLDH==false){ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(), // (failPos>0) ? (failPos-1) : failPos); // }else if(processOut.charAt(0) == HYPHEN){ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(),0); // // }else{ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(), // processOut.Length); // // } // } // } // // just return the source // return new StringBuffer(src.getText()); // } return(new StringBuffer(src.GetText())); }
public void TestIteration() { UCharacterIterator iterator = UCharacterIterator.GetInstance( ITERATION_STRING_); UCharacterIterator iterator2 = UCharacterIterator.GetInstance( ITERATION_STRING_); iterator.SetToStart(); if (iterator.Current != ITERATION_STRING_[0]) { Errln("Iterator failed retrieving first character"); } iterator.SetToLimit(); if (iterator.Previous() != ITERATION_STRING_[ ITERATION_STRING_.Length - 1]) { Errln("Iterator failed retrieving last character"); } if (iterator.Length != ITERATION_STRING_.Length) { Errln("Iterator failed determining begin and end index"); } iterator2.Index = 0; iterator.Index = 0; int ch = 0; while (ch != UCharacterIterator.DONE) { int index = iterator2.Index; ch = iterator2.NextCodePoint(); if (index != ITERATION_SUPPLEMENTARY_INDEX) { if (ch != iterator.Next() && ch != UCharacterIterator.DONE) { Errln("Error mismatch in next() and nextCodePoint()"); } } else { if (UTF16.GetLeadSurrogate(ch) != iterator.Next() || UTF16.GetTrailSurrogate(ch) != iterator.Next()) { Errln("Error mismatch in next and nextCodePoint for " + "supplementary characters"); } } } iterator.Index = ITERATION_STRING_.Length; iterator2.Index = ITERATION_STRING_.Length; while (ch != UCharacterIterator.DONE) { int index = iterator2.Index; ch = iterator2.PreviousCodePoint(); if (index != ITERATION_SUPPLEMENTARY_INDEX) { if (ch != iterator.Previous() && ch != UCharacterIterator.DONE) { Errln("Error mismatch in previous() and " + "previousCodePoint()"); } } else { if (UTF16.GetLeadSurrogate(ch) != iterator.Previous() || UTF16.GetTrailSurrogate(ch) != iterator.Previous()) { Errln("Error mismatch in previous and " + "previousCodePoint for supplementary characters"); } } } }
public void TestIterationUChar32() { String text = "\u0061\u0062\ud841\udc02\u20ac\ud7ff\ud842\udc06\ud801\udc00\u0061"; int c; int i; { UCharacterIterator iter = UCharacterIterator.GetInstance(text); String iterText = iter.GetText(); if (!iterText.Equals(text)) { Errln("iter.getText() failed"); } iter.Index = (1); if (iter.CurrentCodePoint != UTF16.CharAt(text, 1)) { Errln("Iterator didn't start out in the right place."); } iter.SetToStart(); c = iter.CurrentCodePoint; i = 0; i = iter.MoveCodePointIndex(1); c = iter.CurrentCodePoint; if (c != UTF16.CharAt(text, 1) || i != 1) { Errln("moveCodePointIndex(1) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 1)) + " i= " + i); } i = iter.MoveCodePointIndex(2); c = iter.CurrentCodePoint; if (c != UTF16.CharAt(text, 4) || i != 4) { Errln("moveCodePointIndex(2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 4)) + " i= " + i); } i = iter.MoveCodePointIndex(-2); c = iter.CurrentCodePoint; if (c != UTF16.CharAt(text, 1) || i != 1) { Errln("moveCodePointIndex(-2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 1)) + " i= " + i); } iter.SetToLimit(); i = iter.MoveCodePointIndex(-2); c = iter.CurrentCodePoint; if (c != UTF16.CharAt(text, (text.Length - 3)) || i != (text.Length - 3)) { Errln("moveCodePointIndex(-2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, (text.Length - 3))) + " i= " + i); } iter.SetToStart(); c = iter.CurrentCodePoint; i = 0; //testing first32PostInc, nextCodePointPostInc, setTostart i = 0; iter.SetToStart(); c = iter.Next(); if (c != UTF16.CharAt(text, i)) { Errln("first32PostInc failed. Expected->" + Hex(UTF16.CharAt(text, i)) + " Got-> " + Hex(c)); } if (iter.Index != UTF16.GetCharCount(c) + i) { Errln("getIndex() after first32PostInc() failed"); } iter.SetToStart(); i = 0; if (iter.Index != 0) { Errln("setToStart failed"); } Logln("Testing forward iteration..."); do { if (c != UCharacterIterator.DONE) { c = iter.NextCodePoint(); } if (c != UTF16.CharAt(text, i)) { Errln("Character mismatch at position " + i + ", iterator has " + Hex(c) + ", string has " + Hex(UTF16.CharAt(text, i))); } i += UTF16.GetCharCount(c); if (iter.Index != i) { Errln("getIndex() aftr nextCodePointPostInc() isn't working right"); } c = iter.CurrentCodePoint; if (c != UCharacterIterator.DONE && c != UTF16.CharAt(text, i)) { Errln("current() after nextCodePointPostInc() isn't working right"); } } while (c != UCharacterIterator.DONE); c = iter.NextCodePoint(); if (c != UCharacterIterator.DONE) { Errln("nextCodePointPostInc() didn't return DONE at the beginning"); } } }
/// <summary> /// Increments the iterator's index by one and returns the character /// at the new index. If the resulting index is greater or equal /// to <see cref="EndIndex"/>, the current index is reset to <see cref="EndIndex"/> and /// a value of <see cref="UCharacterIterator.Done"/> is returned. /// </summary> /// <returns>The character at the new position or <see cref="UCharacterIterator.Done"/> if the new /// position is off the end of the text range.</returns> public override char Next() { //pre-increment iterator.Next(); return((char)iterator.Current); }
/// <summary> /// Increments the iterator's index by one and returns the character at the /// new index. If the resulting index is greater or equal to getEndIndex(), /// the current index is reset to getEndIndex() and a value of DONE is /// returned. /// </summary> /// /// <returns>the character at the new position or DONE if the new position is /// off the end of the text range.</returns> public virtual char Next() { // pre-increment iterator.Next(); return((char)iterator.Current()); }