public static StringBuffer ConvertIDNToASCII(string src, IDNA2003Options options) { char[] srcArr = src.ToCharArray(); StringBuffer result = new StringBuffer(); int sepIndex = 0; int oldSepIndex = 0; for (; ;) { sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length); string label = new string(srcArr, oldSepIndex, sepIndex - oldSepIndex); //make sure this is not a root label separator. if (!(label.Length == 0 && sepIndex == srcArr.Length)) { UCharacterIterator iter = UCharacterIterator.GetInstance(label); result.Append(ConvertToASCII(iter, options)); } if (sepIndex == srcArr.Length) { break; } // increment the sepIndex to skip past the separator sepIndex++; oldSepIndex = sepIndex; result.Append((char)FULL_STOP); } if (result.Length > MAX_DOMAIN_NAME_LENGTH) { throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepErrorType.DomainNameTooLongError); } return(result); }
// public constructors -------------------------------------------------- // public methods ------------------------------------------------------- /// <summary> /// <p> /// Encode the code points of a string as a sequence of bytes, preserving /// lexical order. /// </p> /// <p> /// The minimum size of buffer required for the compression can be /// preflighted by getCompressionLength(String). /// </p> /// </summary> /// /// <param name="source">text source</param> /// <param name="buffer">output buffer</param> /// <param name="offset">to start writing to</param> /// <returns>end offset where the writing stopped</returns> /// <seealso cref="M:IBM.ICU.Impl.BOCU.GetCompressionLength(System.String)"/> /// <exception cref="ArrayIndexOutOfBoundsException">thrown if size of buffer is too small for the output.</exception> public static int Compress(String source, byte[] buffer, int offset) { int prev = 0; UCharacterIterator iterator = IBM.ICU.Text.UCharacterIterator.GetInstance(source); int codepoint = iterator.NextCodePoint(); while (codepoint != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE) { if (prev < 0x4e00 || prev >= 0xa000) { prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_; } else { // Unihan U+4e00..U+9fa5: // double-bytes down from the upper end prev = 0x9fff - SLOPE_REACH_POS_2_; } offset = WriteDiff(codepoint - prev, buffer, offset); prev = codepoint; codepoint = iterator.NextCodePoint(); } return(offset); }
public static byte[] MixedPrepare(byte[] src) { String s = Encoding.UTF8.GetString(src);; int index = s.IndexOf(AT_SIGN); StringBuffer @out = new StringBuffer(); if (index > -1) { /* special prefixes must not be followed by suffixes! */ String prefixString = s.Substring(0, index); // ICU4N: Checked 2nd parameter int i = FindStringIndex(special_prefixes, prefixString); String suffixString = s.Substring(index + 1, s.Length - (index + 1)); // ICU4N: Corrected 2nd parameter if (i > -1 && !suffixString.Equals("")) { throw new StringPrepParseException("Suffix following a special index", StringPrepErrorType.InvalidCharFound); } UCharacterIterator prefix = UCharacterIterator.GetInstance(prefixString); UCharacterIterator suffix = UCharacterIterator.GetInstance(suffixString); @out.Append(prep.nfsmxp.Prepare(prefix, StringPrepOptions.Default)); @out.Append(AT_SIGN); // add the delimiter @out.Append(prep.nfsmxs.Prepare(suffix, StringPrepOptions.Default)); } else { UCharacterIterator iter = UCharacterIterator.GetInstance(s); @out.Append(prep.nfsmxp.Prepare(iter, StringPrepOptions.Default)); } return(Encoding.UTF8.GetBytes(@out.ToString())); }
public static StringBuffer ConvertIDNToUnicode(String src, IDNA2003Options options) { char[] srcArr = src.ToCharArray(); StringBuffer result = new StringBuffer(); int sepIndex = 0; int oldSepIndex = 0; for (; ;) { sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length); string label = new string(srcArr, oldSepIndex, sepIndex - oldSepIndex); if (label.Length == 0 && sepIndex != srcArr.Length) { throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel); } UCharacterIterator iter = UCharacterIterator.GetInstance(label); result.Append(ConvertToUnicode(iter, options)); if (sepIndex == srcArr.Length) { break; } // Unlike the ToASCII operation we don't normalize the label separators result.Append(srcArr[sepIndex]); // increment the sepIndex to skip past the separator sepIndex++; oldSepIndex = sepIndex; } if (result.Length > MAX_DOMAIN_NAME_LENGTH) { throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepErrorType.DomainNameTooLongError); } return(result); }
public void TestJitterbug1952() { //test previous code point char[] src = new char[] { '\uDC00', '\uD800', '\uDC01', '\uD802', '\uDC02', '\uDC03' }; UCharacterIterator iter = UCharacterIterator.GetInstance(src); iter.Index = 1; int ch; // this should never go into a infinite loop // if it does then we have a problem while ((ch = iter.PreviousCodePoint()) != UCharacterIterator.DONE) { if (ch != 0xDc00) { Errln("iter.PreviousCodePoint() failed"); } } iter.Index = (5); while ((ch = iter.NextCodePoint()) != UCharacterIterator.DONE) { if (ch != 0xDC03) { Errln("iter.NextCodePoint() failed"); } } }
public static StringBuffer ConvertIDNToASCII(String src, IDNA2003Options options) { char[] srcArr = src.ToCharArray(); StringBuffer result = new StringBuffer(); int sepIndex = 0; int oldSepIndex = 0; for (; ;) { sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length); String label = new String(srcArr, oldSepIndex, sepIndex - oldSepIndex); //make sure this is not a root label separator. if (!(label.Length == 0 && sepIndex == srcArr.Length)) { UCharacterIterator iter = UCharacterIterator.GetInstance(label); result.Append(ConvertToASCII(iter, options)); } if (sepIndex == srcArr.Length) { break; } // increment the sepIndex to skip past the separator sepIndex++; oldSepIndex = sepIndex; result.Append((char)FULL_STOP); } return(result); }
public static StringBuffer ConvertIDNToUnicode(String src, IDNA2003Options options) { char[] srcArr = src.ToCharArray(); StringBuffer result = new StringBuffer(); int sepIndex = 0; int oldSepIndex = 0; for (; ;) { sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length); String label = new String(srcArr, oldSepIndex, sepIndex - oldSepIndex); if (label.Length == 0 && sepIndex != srcArr.Length) { throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel); } UCharacterIterator iter = UCharacterIterator.GetInstance(label); result.Append(ConvertToUnicode(iter, options)); if (sepIndex == srcArr.Length) { break; } // increment the sepIndex to skip past the separator sepIndex++; oldSepIndex = sepIndex; result.Append((char)FULL_STOP); } return(result); }
/// <summary> /// Return the number of bytes that compress() would write. /// </summary> /// /// <param name="source">text source string</param> /// <returns>the length of the BOCU result</returns> /// <seealso cref="M:IBM.ICU.Impl.BOCU.Compress(System.String, null, System.Int32)"/> public static int GetCompressionLength(String source) { int prev = 0; int result = 0; UCharacterIterator iterator = IBM.ICU.Text.UCharacterIterator.GetInstance(source); int codepoint = iterator.NextCodePoint(); while (codepoint != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE) { if (prev < 0x4e00 || prev >= 0xa000) { prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_; } else { // Unihan U+4e00..U+9fa5: // double-bytes down from the upper end prev = 0x9fff - SLOPE_REACH_POS_2_; } codepoint = iterator.NextCodePoint(); result += LengthOfDiff(codepoint - prev); prev = codepoint; } return(result); }
private static byte[] Prepare(byte[] src, StringPrep strprep) { String s = Encoding.UTF8.GetString(src); UCharacterIterator iter = UCharacterIterator.GetInstance(s); StringBuffer @out = strprep.Prepare(iter, StringPrepOptions.Default); return(Encoding.UTF8.GetBytes(@out.ToString())); }
public FCDIterCollationIterator(CollationData data, bool numeric, UCharacterIterator ui, int startIndex) : base(data, numeric, ui) { state = State.IterCheckFwd; start = startIndex; nfcImpl = data.nfcImpl; }
public StringBuffer Prepare(String src, StringPrepOptions options) { int ch; String mapOut = Map(src, options); UCharacterIterator iter = UCharacterIterator.GetInstance(mapOut); UCharacterDirection direction = UCharacterDirectionExtensions.CharDirectionCount, firstCharDir = UCharacterDirectionExtensions.CharDirectionCount; int rtlPos = -1, ltrPos = -1; bool rightToLeft = false, leftToRight = false; while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done) { if (transform.prohibitedSet.Contains(ch) == true && ch != 0x0020) { throw new StringPrepParseException("A prohibited code point was found in the input", StringPrepErrorType.ProhibitedError, iter.GetText(), iter.Index); } direction = UChar.GetDirection(ch); if (firstCharDir == UCharacterDirectionExtensions.CharDirectionCount) { firstCharDir = direction; } if (direction == UCharacterDirection.LeftToRight) { leftToRight = true; ltrPos = iter.Index - 1; } if (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic) { rightToLeft = true; rtlPos = iter.Index - 1; } } // satisfy 2 if (leftToRight == true && rightToLeft == true) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } //satisfy 3 if (rightToLeft == true && !((firstCharDir == UCharacterDirection.RightToLeft || firstCharDir == UCharacterDirection.RightToLeftArabic) && (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic)) ) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } return(new StringBuffer(mapOut)); }
// TODO: optimize public static int Compare(UCharacterIterator i1, UCharacterIterator i2, IDNA2003Options options) { if (i1 == null || i2 == null) { throw new ArgumentException("One of the source buffers is null"); } StringBuffer s1Out = ConvertIDNToASCII(i1.GetText(), options); StringBuffer s2Out = ConvertIDNToASCII(i2.GetText(), options); return(CompareCaseInsensitiveASCII(s1Out, s2Out)); }
public void TestClone() { UCharacterIterator iterator = UCharacterIterator.GetInstance("testing"); UCharacterIterator cloned = (UCharacterIterator)iterator.Clone(); int completed = 0; while (completed != UCharacterIterator.DONE) { completed = iterator.Next(); if (completed != cloned.Next()) { Errln("Cloned operation failed"); } } }
private String Map(String src, StringPrepOptions options) { // map bool allowUnassigned = ((options & ALLOW_UNASSIGNED) > 0); // disable test String caseMapOut = mapTransform.Transliterate(src); UCharacterIterator iter = UCharacterIterator.GetInstance(caseMapOut); int ch; while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done) { if (transform.unassignedSet.Contains(ch) == true && allowUnassigned == false) { throw new StringPrepParseException("An unassigned code point was found in the input", StringPrepErrorType.UnassignedError); } } return(caseMapOut); }
public void getText(UCharacterIterator iterator, String result) { /* test getText */ char[] buf = new char[1]; for (; ;) { try { iterator.GetText(buf); break; } catch (IndexOutOfRangeException e) { buf = new char[iterator.Length]; } } if (result.CompareToOrdinal(new string(buf, 0, iterator.Length)) != 0) { Errln("getText failed for iterator"); } }
public static StringBuffer Encode(StringBuffer input, char[] case_flags) { int[] @in = new int[input.Length]; int inLen = 0; int ch; StringBuffer result = new StringBuffer(); UCharacterIterator iter = UCharacterIterator.GetInstance(input); while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done) { @in[inLen++] = ch; } int[] outLen = new int[1]; outLen[0] = input.Length * 4; char[] output = new char[outLen[0]]; int rc = punycode_success; for (; ;) { rc = Encode(inLen, @in, case_flags, outLen, output); if (rc == punycode_big_output) { outLen[0] = outLen[0] * 4; output = new char[outLen[0]]; // continue to convert continue; } break; } if (rc == punycode_success) { return(result.Append(output, 0, outLen[0])); } GetException(rc); return(result); }
public static StringBuffer ConvertIDNToUnicode(UCharacterIterator iter, IDNA2003Options options) { return(ConvertIDNToUnicode(iter.GetText(), options)); }
public static StringBuffer ConvertToUnicode(UCharacterIterator iter, IDNA2003Options options) { // the source contains all ascii codepoints bool srcIsASCII = true; int ch; int saveIndex = iter.Index; // step 1: find out if all the codepoints in src are ASCII while ((ch = iter.Next()) != UCharacterIterator.DONE) { if (ch > 0x7F) { srcIsASCII = false; break; } } // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> do { StringBuffer processOut; if (srcIsASCII == false) { // step 2: process the string iter.Index = (saveIndex); try { processOut = transform.Prepare(iter, (StringPrepOptions)options); } catch (StringPrepParseException e) { break; } } else { // just point to source processOut = new StringBuffer(iter.GetText()); } // step 3: verify ACE Prefix if (StartsWithPrefix(processOut)) { // step 4: Remove the ACE Prefix String temp = processOut.ToString(ACE_PREFIX_LENGTH, processOut.Length - ACE_PREFIX_LENGTH); // step 5: Decode using punycode StringBuffer decodeOut = null; try { decodeOut = PunycodeReference.Decode(new StringBuffer(temp), null); } catch (StringPrepParseException e) { break; } // step 6:Apply toASCII StringBuffer toASCIIOut = ConvertToASCII(decodeOut, options); // step 7: verify if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) { break; } // step 8: return output of step 5 return(decodeOut); } } while (false); return(new StringBuffer(iter.GetText())); }
public static StringBuffer ConvertToUnicode(StringBuffer src, IDNA2003Options options) { UCharacterIterator iter = UCharacterIterator.GetInstance(src); return(ConvertToUnicode(iter, options)); }
public void TestUCharacterIteratorWrapper() { String source = "asdfasdfjoiuyoiuy2341235679886765"; UCharacterIterator it = UCharacterIterator.GetInstance(source); CharacterIterator wrap_ci = it.GetCharacterIterator(); CharacterIterator ci = new StringCharacterIterator(source); wrap_ci.SetIndex(10); ci.SetIndex(10); String moves = "0+0+0--0-0-+++0--+++++++0--------++++0000----0-"; int c1, c2; char m; int movesIndex = 0; while (movesIndex < moves.Length) { m = moves[movesIndex++]; if (m == '-') { c1 = wrap_ci.Previous(); c2 = ci.Previous(); } else if (m == '0') { c1 = wrap_ci.Current; c2 = ci.Current; } else {// m=='+' c1 = wrap_ci.Next(); c2 = ci.Next(); } // compare results if (c1 != c2) { // copy the moves until the current (m) move, and terminate String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter Errln("error: mismatch in Normalizer iteration at " + history + ": " + "got c1= " + Hex(c1) + " != expected c2= " + Hex(c2)); break; } // compare indexes if (wrap_ci.Index != ci.Index) { // copy the moves until the current (m) move, and terminate String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter Errln("error: index mismatch in Normalizer iteration at " + history + " : " + "Normalizer index " + wrap_ci.Index + " expected " + ci.Index); break; } } if (ci.First() != wrap_ci.First()) { Errln("CharacterIteratorWrapper.First() failed. expected: " + ci.First() + " got: " + wrap_ci.First()); } if (ci.Last() != wrap_ci.Last()) { Errln("CharacterIteratorWrapper.Last() failed expected: " + ci.Last() + " got: " + wrap_ci.Last()); } if (ci.BeginIndex != wrap_ci.BeginIndex) { Errln("CharacterIteratorWrapper.BeginIndex failed expected: " + ci.BeginIndex + " got: " + wrap_ci.BeginIndex); } if (ci.EndIndex != wrap_ci.EndIndex) { Errln("CharacterIteratorWrapper.EndIndex failed expected: " + ci.EndIndex + " got: " + wrap_ci.EndIndex); } try { CharacterIterator cloneWCI = (CharacterIterator)wrap_ci.Clone(); if (wrap_ci.Index != cloneWCI.Index) { Errln("CharacterIteratorWrapper.Clone() failed expected: " + wrap_ci.Index + " got: " + cloneWCI.Index); } } catch (Exception e) { Errln("CharacterIterator.Clone() failed"); } }
public static StringBuffer ConvertToASCII(UCharacterIterator srcIter, IDNA2003Options options) { char[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints bool srcIsLDH = true; //get the options bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); int ch; // step 1 while ((ch = srcIter.Next()) != UCharacterIterator.DONE) { if (ch > 0x7f) { srcIsASCII = false; } } int failPos = -1; srcIter.SetToStart(); StringBuffer processOut = null; // step 2 is performed only if the source contains non ASCII if (!srcIsASCII) { // step 2 processOut = transform.Prepare(srcIter, (StringPrepOptions)options); } else { processOut = new StringBuffer(srcIter.GetText()); } int poLen = processOut.Length; if (poLen == 0) { throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel); } StringBuffer dest = new StringBuffer(); // reset the variable to verify if output of prepare is ASCII or not srcIsASCII = true; // step 3 & 4 for (int j = 0; j < poLen; j++) { ch = processOut[j]; if (ch > 0x7F) { srcIsASCII = false; } else if (IsLDHChar(ch) == false) { // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = false; failPos = j; } } if (useSTD3ASCIIRules == true) { // verify 3a and 3b if (srcIsLDH == false || /* source contains some non-LDH characters */ processOut[0] == HYPHEN || processOut[processOut.Length - 1] == HYPHEN) { /* populate the parseError struct */ if (srcIsLDH == false) { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), (failPos > 0) ? (failPos - 1) : failPos); } else if (processOut[0] == HYPHEN) { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), 0); } else { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), (poLen > 0) ? poLen - 1 : poLen); } } } if (srcIsASCII) { dest = processOut; } else { // step 5 : verify the sequence does not begin with ACE prefix if (!StartsWithPrefix(processOut)) { //step 6: encode the sequence with punycode StringBuffer punyout = PunycodeReference.Encode(processOut, caseFlags); // convert all codepoints to lower case ASCII StringBuffer lowerOut = ToASCIILower(punyout); //Step 7: prepend the ACE prefix dest.Append(ACE_PREFIX, 0, ACE_PREFIX_LENGTH - 0); // ICU4N: Checked 3rd parameter //Step 6: copy the contents in b2 into dest dest.Append(lowerOut); } else { throw new StringPrepParseException("The input does not start with the ACE Prefix.", StringPrepErrorType.AcePrefixError, processOut.ToString(), 0); } } if (dest.Length > MAX_LABEL_LENGTH) { throw new StringPrepParseException("The labels in the input are too long. Length > 64.", StringPrepErrorType.LabelTooLongError, dest.ToString(), 0); } return(dest); }
public IterCollationIterator(CollationData d, bool numeric, UCharacterIterator ui) : base(d, numeric) { iter = ui; }
// // toUnicode operation; should only apply to a single label // private static String ToUnicodeInternal(String label, int flag) { bool[] caseFlags = null; StringBuffer dest; // step 1 // find out if all the codepoints in input are ASCII bool isASCII = IsAllASCII(label); if (!isASCII) { // step 2 // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here try { UCharacterIterator iter = UCharacterIterator.getInstance(label); dest = NamePrep.prepare(iter, flag); } catch (Exception) { // toUnicode never fails; if any step fails, return the input string return(label); } } else { dest = new StringBuffer(label); } // step 3 // verify ACE Prefix if (StartsWithACEPrefix(dest)) { // step 4 // Remove the ACE Prefix String temp = dest.Substring(ACE_PREFIX_LENGTH, dest.Length() - ACE_PREFIX_LENGTH); try { // step 5 // Decode using punycode StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null); // step 6 // Apply toASCII String toASCIIOut = ToASCII(decodeOut.ToString(), flag); // step 7 // verify if (toASCIIOut.EqualsIgnoreCase(dest.ToString())) { // step 8 // return output of step 5 return(decodeOut.ToString()); } } catch (Exception) { // no-op } } // just return the input return(label); }
// // toASCII operation; should only apply to a single label // private static String ToASCIIInternal(String label, int flag) { // step 1 // Check if the string contains code points outside the ASCII range 0..0x7c. bool isASCII = IsAllASCII(label); StringBuffer dest; // step 2 // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here if (!isASCII) { UCharacterIterator iter = UCharacterIterator.getInstance(label); try { dest = NamePrep.prepare(iter, flag); } catch (java.text.ParseException e) { throw new IllegalArgumentException(e); } } else { dest = new StringBuffer(label); } // step 8, move forward to check the smallest number of the code points // the length must be inside 1..63 if (dest.Length() == 0) { throw new IllegalArgumentException("Empty label is not a legal name"); } // step 3 // Verify the absence of non-LDH ASCII code points // 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f // Verify the absence of leading and trailing hyphen bool useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0); if (useSTD3ASCIIRules) { for (int i = 0; i < dest.Length(); i++) { int c = dest.CharAt(i); if (IsNonLDHAsciiCodePoint(c)) { throw new IllegalArgumentException("Contains non-LDH ASCII characters"); } } if (dest.CharAt(0) == '-' || dest.CharAt(dest.Length() - 1) == '-') { throw new IllegalArgumentException("Has leading or trailing hyphen"); } } if (!isASCII) { // step 4 // If all code points are inside 0..0x7f, skip to step 8 if (!IsAllASCII(dest.ToString())) { // step 5 // verify the sequence does not begin with ACE prefix if (!StartsWithACEPrefix(dest)) { // step 6 // encode the sequence with punycode try { dest = Punycode.encode(dest, null); } catch (java.text.ParseException e) { throw new IllegalArgumentException(e); } dest = ToASCIILower(dest); // step 7 // prepend the ACE prefix dest.Insert(0, ACE_PREFIX); } else { throw new IllegalArgumentException("The input starts with the ACE Prefix"); } } } // step 8 // the length must be inside 1..63 if (dest.Length() > MAX_LABEL_LENGTH) { throw new IllegalArgumentException("The label in the input is too long"); } return(dest.ToString()); }
public void TestIteration() { UCharacterIterator iterator = UCharacterIterator.GetInstance( ITERATION_STRING_); UCharacterIterator iterator2 = UCharacterIterator.GetInstance( ITERATION_STRING_); iterator.SetToStart(); if (iterator.Current != ITERATION_STRING_[0]) { Errln("Iterator failed retrieving first character"); } iterator.SetToLimit(); if (iterator.Previous() != ITERATION_STRING_[ ITERATION_STRING_.Length - 1]) { Errln("Iterator failed retrieving last character"); } if (iterator.Length != ITERATION_STRING_.Length) { Errln("Iterator failed determining begin and end index"); } iterator2.Index = 0; iterator.Index = 0; int ch = 0; while (ch != UCharacterIterator.DONE) { int index = iterator2.Index; ch = iterator2.NextCodePoint(); if (index != ITERATION_SUPPLEMENTARY_INDEX) { if (ch != iterator.Next() && ch != UCharacterIterator.DONE) { Errln("Error mismatch in next() and nextCodePoint()"); } } else { if (UTF16.GetLeadSurrogate(ch) != iterator.Next() || UTF16.GetTrailSurrogate(ch) != iterator.Next()) { Errln("Error mismatch in next and nextCodePoint for " + "supplementary characters"); } } } iterator.Index = ITERATION_STRING_.Length; iterator2.Index = ITERATION_STRING_.Length; while (ch != UCharacterIterator.DONE) { int index = iterator2.Index; ch = iterator2.PreviousCodePoint(); if (index != ITERATION_SUPPLEMENTARY_INDEX) { if (ch != iterator.Previous() && ch != UCharacterIterator.DONE) { Errln("Error mismatch in previous() and " + "previousCodePoint()"); } } else { if (UTF16.GetLeadSurrogate(ch) != iterator.Previous() || UTF16.GetTrailSurrogate(ch) != iterator.Previous()) { Errln("Error mismatch in previous and " + "previousCodePoint for supplementary characters"); } } } }
public void previousNext(UCharacterIterator iter) { int[] expect = { 0x2f999, 0x1d15f, 0xc4, 0x1ed0 }; // expected src indexes corresponding to expect indexes int[] expectIndex = { 0, 0, 1, 1, 2, 3, 4 //needed }; // initial indexes into the src and expect strings int SRC_MIDDLE = 4; int EXPECT_MIDDLE = 2; // movement vector // - for previous(), 0 for current(), + for next() // not const so that we can terminate it below for the error message String moves = "0+0+0--0-0-+++0--+++++++0--------"; UCharIterator iter32 = new UCharIterator(expect, expect.Length, EXPECT_MIDDLE); int c1, c2; char m; // initially set the indexes into the middle of the strings iter.Index = (SRC_MIDDLE); // move around and compare the iteration code points with // the expected ones int movesIndex = 0; while (movesIndex < moves.Length) { m = moves[movesIndex++]; if (m == '-') { c1 = iter.PreviousCodePoint(); c2 = iter32.Previous(); } else if (m == '0') { c1 = iter.CurrentCodePoint; c2 = iter32.Current; } else {// m=='+' c1 = iter.NextCodePoint(); c2 = iter32.Next(); } // compare results if (c1 != c2) { // copy the moves until the current (m) move, and terminate String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter Errln("error: mismatch in Normalizer iteration at " + history + ": " + "got c1= " + Hex(c1) + " != expected c2= " + Hex(c2)); break; } // compare indexes if (expectIndex[iter.Index] != iter32.Index) { // copy the moves until the current (m) move, and terminate String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter Errln("error: index mismatch in Normalizer iteration at " + history + " : " + "Normalizer index " + iter.Index + " expected " + expectIndex[iter32.Index]); break; } } }
public void TestSetText(/* char* par */) { RuleBasedCollator en_us = (RuleBasedCollator)Collator.GetInstance(new CultureInfo("en-US")); CollationElementIterator iter1 = en_us.GetCollationElementIterator(test1); CollationElementIterator iter2 = en_us.GetCollationElementIterator(test2); // Run through the second iterator just to exercise it int c = iter2.Next(); int i = 0; while (++i < 10 && c != CollationElementIterator.NULLORDER) { try { c = iter2.Next(); } catch (Exception e) { Errln("iter2.Next() returned an error."); break; } } // Now set it to point to the same string as the first iterator try { iter2.SetText(test1); } catch (Exception e) { Errln("call to iter2->setText(test1) failed."); return; } assertEqual(iter1, iter2); iter1.Reset(); //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text CharacterIterator chariter = new StringCharacterIterator(test1); try { iter2.SetText(chariter); } catch (Exception e) { Errln("call to iter2->setText(chariter(test1)) failed."); return; } assertEqual(iter1, iter2); iter1.Reset(); //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text UCharacterIterator uchariter = UCharacterIterator.GetInstance(test1); try { iter2.SetText(uchariter); } catch (Exception e) { Errln("call to iter2->setText(uchariter(test1)) failed."); return; } assertEqual(iter1, iter2); }
public StringBuffer Prepare(UCharacterIterator src, StringPrepOptions options) { return(Prepare(src.GetText(), options)); }
/// <summary> /// Reset the filter from the delegate. /// </summary> private void ResetState() { text = UCharacterIterator.GetInstance((CharacterIterator)@delegate.Text.Clone()); }
public static StringBuffer ConvertToUnicode(UCharacterIterator src, IDNA2003Options options) { bool[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints //bool srcIsLDH = true; //get the options //bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); //int failPos = -1; int ch; int saveIndex = src.Index; // step 1: find out if all the codepoints in src are ASCII while ((ch = src.Next()) != UCharacterIterator.DONE) { if (ch > 0x7F) { srcIsASCII = false; }/*else if((srcIsLDH = isLDHChar(ch))==false){ * failPos = src.getIndex(); * }*/ } StringBuffer processOut; if (srcIsASCII == false) { try { // step 2: process the string src.Index = saveIndex; processOut = namePrep.Prepare(src, (StringPrepOptions)options); } catch (StringPrepParseException ex) { return(new StringBuffer(src.GetText())); } } else { //just point to source processOut = new StringBuffer(src.GetText()); } // TODO: // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> //step 3: verify ACE Prefix if (StartsWithPrefix(processOut)) { StringBuffer decodeOut = null; //step 4: Remove the ACE Prefix string temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length); //step 5: Decode using punycode try { decodeOut = new StringBuffer(Punycode.Decode(temp, caseFlags).ToString()); } catch (StringPrepParseException e) { decodeOut = null; } //step 6:Apply toASCII if (decodeOut != null) { StringBuffer toASCIIOut = ConvertToASCII(UCharacterIterator.GetInstance(decodeOut), options); //step 7: verify if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) { // throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed", // StringPrepParseException.VERIFICATION_ERROR); decodeOut = null; } } //step 8: return output of step 5 if (decodeOut != null) { return(decodeOut); } } // }else{ // // verify that STD3 ASCII rules are satisfied // if(useSTD3ASCIIRules == true){ // if( srcIsLDH == false /* source contains some non-LDH characters */ // || processOut.charAt(0) == HYPHEN // || processOut.charAt(processOut.Length-1) == HYPHEN){ // // if(srcIsLDH==false){ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(), // (failPos>0) ? (failPos-1) : failPos); // }else if(processOut.charAt(0) == HYPHEN){ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(),0); // // }else{ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(), // processOut.Length); // // } // } // } // // just return the source // return new StringBuffer(src.getText()); // } return(new StringBuffer(src.GetText())); }