UCharacterIterator C# (CSharp) Code Examples

Example #1

0

Show file

        public static StringBuffer ConvertIDNToASCII(string src, IDNA2003Options options)
        {
            char[]       srcArr      = src.ToCharArray();
            StringBuffer result      = new StringBuffer();
            int          sepIndex    = 0;
            int          oldSepIndex = 0;

            for (; ;)
            {
                sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length);
                string label = new string(srcArr, oldSepIndex, sepIndex - oldSepIndex);
                //make sure this is not a root label separator.
                if (!(label.Length == 0 && sepIndex == srcArr.Length))
                {
                    UCharacterIterator iter = UCharacterIterator.GetInstance(label);
                    result.Append(ConvertToASCII(iter, options));
                }
                if (sepIndex == srcArr.Length)
                {
                    break;
                }

                // increment the sepIndex to skip past the separator
                sepIndex++;
                oldSepIndex = sepIndex;
                result.Append((char)FULL_STOP);
            }
            if (result.Length > MAX_DOMAIN_NAME_LENGTH)
            {
                throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepErrorType.DomainNameTooLongError);
            }
            return(result);
        }

Example #2

0

Show file

        // public constructors --------------------------------------------------

        // public methods -------------------------------------------------------

        /// <summary>
        /// <p>
        /// Encode the code points of a string as a sequence of bytes, preserving
        /// lexical order.
        /// </p>
        /// <p>
        /// The minimum size of buffer required for the compression can be
        /// preflighted by getCompressionLength(String).
        /// </p>
        /// </summary>
        ///
        /// <param name="source">text source</param>
        /// <param name="buffer">output buffer</param>
        /// <param name="offset">to start writing to</param>
        /// <returns>end offset where the writing stopped</returns>
        /// <seealso cref="M:IBM.ICU.Impl.BOCU.GetCompressionLength(System.String)"/>
        /// <exception cref="ArrayIndexOutOfBoundsException">thrown if size of buffer is too small for the output.</exception>
        public static int Compress(String source, byte[] buffer, int offset)
        {
            int prev = 0;
            UCharacterIterator iterator = IBM.ICU.Text.UCharacterIterator.GetInstance(source);
            int codepoint = iterator.NextCodePoint();

            while (codepoint != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                if (prev < 0x4e00 || prev >= 0xa000)
                {
                    prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
                }
                else
                {
                    // Unihan U+4e00..U+9fa5:
                    // double-bytes down from the upper end
                    prev = 0x9fff - SLOPE_REACH_POS_2_;
                }

                offset    = WriteDiff(codepoint - prev, buffer, offset);
                prev      = codepoint;
                codepoint = iterator.NextCodePoint();
            }
            return(offset);
        }

Example #3

0

Show file

File: NFS4StringPrep.cs Project: NightOwl888/ICU4N

        public static byte[] MixedPrepare(byte[] src)
        {
            String       s     = Encoding.UTF8.GetString(src);;
            int          index = s.IndexOf(AT_SIGN);
            StringBuffer @out  = new StringBuffer();

            if (index > -1)
            {
                /* special prefixes must not be followed by suffixes! */
                String prefixString = s.Substring(0, index);                          // ICU4N: Checked 2nd parameter
                int    i            = FindStringIndex(special_prefixes, prefixString);
                String suffixString = s.Substring(index + 1, s.Length - (index + 1)); // ICU4N: Corrected 2nd parameter
                if (i > -1 && !suffixString.Equals(""))
                {
                    throw new StringPrepParseException("Suffix following a special index", StringPrepErrorType.InvalidCharFound);
                }
                UCharacterIterator prefix = UCharacterIterator.GetInstance(prefixString);
                UCharacterIterator suffix = UCharacterIterator.GetInstance(suffixString);
                @out.Append(prep.nfsmxp.Prepare(prefix, StringPrepOptions.Default));
                @out.Append(AT_SIGN); // add the delimiter
                @out.Append(prep.nfsmxs.Prepare(suffix, StringPrepOptions.Default));
            }
            else
            {
                UCharacterIterator iter = UCharacterIterator.GetInstance(s);
                @out.Append(prep.nfsmxp.Prepare(iter, StringPrepOptions.Default));
            }
            return(Encoding.UTF8.GetBytes(@out.ToString()));
        }

Example #4

0

Show file

        public static StringBuffer ConvertIDNToUnicode(String src, IDNA2003Options options)
        {
            char[]       srcArr      = src.ToCharArray();
            StringBuffer result      = new StringBuffer();
            int          sepIndex    = 0;
            int          oldSepIndex = 0;

            for (; ;)
            {
                sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length);
                string label = new string(srcArr, oldSepIndex, sepIndex - oldSepIndex);
                if (label.Length == 0 && sepIndex != srcArr.Length)
                {
                    throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel);
                }
                UCharacterIterator iter = UCharacterIterator.GetInstance(label);
                result.Append(ConvertToUnicode(iter, options));
                if (sepIndex == srcArr.Length)
                {
                    break;
                }
                // Unlike the ToASCII operation we don't normalize the label separators
                result.Append(srcArr[sepIndex]);
                // increment the sepIndex to skip past the separator
                sepIndex++;
                oldSepIndex = sepIndex;
            }
            if (result.Length > MAX_DOMAIN_NAME_LENGTH)
            {
                throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepErrorType.DomainNameTooLongError);
            }
            return(result);
        }

Example #5

0

Show file

        public void TestJitterbug1952()
        {
            //test previous code point
            char[]             src  = new char[] { '\uDC00', '\uD800', '\uDC01', '\uD802', '\uDC02', '\uDC03' };
            UCharacterIterator iter = UCharacterIterator.GetInstance(src);

            iter.Index = 1;
            int ch;

            // this should never go into a infinite loop
            // if it does then we have a problem
            while ((ch = iter.PreviousCodePoint()) != UCharacterIterator.DONE)
            {
                if (ch != 0xDc00)
                {
                    Errln("iter.PreviousCodePoint() failed");
                }
            }
            iter.Index = (5);
            while ((ch = iter.NextCodePoint()) != UCharacterIterator.DONE)
            {
                if (ch != 0xDC03)
                {
                    Errln("iter.NextCodePoint() failed");
                }
            }
        }

Example #6

0

Show file

        public static StringBuffer ConvertIDNToASCII(String src, IDNA2003Options options)
        {
            char[]       srcArr      = src.ToCharArray();
            StringBuffer result      = new StringBuffer();
            int          sepIndex    = 0;
            int          oldSepIndex = 0;

            for (; ;)
            {
                sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length);
                String label = new String(srcArr, oldSepIndex, sepIndex - oldSepIndex);
                //make sure this is not a root label separator.
                if (!(label.Length == 0 && sepIndex == srcArr.Length))
                {
                    UCharacterIterator iter = UCharacterIterator.GetInstance(label);
                    result.Append(ConvertToASCII(iter, options));
                }
                if (sepIndex == srcArr.Length)
                {
                    break;
                }
                // increment the sepIndex to skip past the separator
                sepIndex++;
                oldSepIndex = sepIndex;
                result.Append((char)FULL_STOP);
            }
            return(result);
        }

Example #7

0

Show file

        public static StringBuffer ConvertIDNToUnicode(String src, IDNA2003Options options)
        {
            char[]       srcArr      = src.ToCharArray();
            StringBuffer result      = new StringBuffer();
            int          sepIndex    = 0;
            int          oldSepIndex = 0;

            for (; ;)
            {
                sepIndex = GetSeparatorIndex(srcArr, sepIndex, srcArr.Length);
                String label = new String(srcArr, oldSepIndex, sepIndex - oldSepIndex);
                if (label.Length == 0 && sepIndex != srcArr.Length)
                {
                    throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel);
                }
                UCharacterIterator iter = UCharacterIterator.GetInstance(label);
                result.Append(ConvertToUnicode(iter, options));
                if (sepIndex == srcArr.Length)
                {
                    break;
                }
                // increment the sepIndex to skip past the separator
                sepIndex++;
                oldSepIndex = sepIndex;
                result.Append((char)FULL_STOP);
            }
            return(result);
        }

Example #8

0

Show file

        /// <summary>
        /// Return the number of bytes that compress() would write.
        /// </summary>
        ///
        /// <param name="source">text source string</param>
        /// <returns>the length of the BOCU result</returns>
        /// <seealso cref="M:IBM.ICU.Impl.BOCU.Compress(System.String, null, System.Int32)"/>
        public static int GetCompressionLength(String source)
        {
            int prev   = 0;
            int result = 0;
            UCharacterIterator iterator = IBM.ICU.Text.UCharacterIterator.GetInstance(source);
            int codepoint = iterator.NextCodePoint();

            while (codepoint != IBM.ICU.Text.UForwardCharacterIterator_Constants.DONE)
            {
                if (prev < 0x4e00 || prev >= 0xa000)
                {
                    prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
                }
                else
                {
                    // Unihan U+4e00..U+9fa5:
                    // double-bytes down from the upper end
                    prev = 0x9fff - SLOPE_REACH_POS_2_;
                }

                codepoint = iterator.NextCodePoint();
                result   += LengthOfDiff(codepoint - prev);
                prev      = codepoint;
            }
            return(result);
        }

Example #9

0

Show file

File: NFS4StringPrep.cs Project: NightOwl888/ICU4N

        private static byte[] Prepare(byte[] src, StringPrep strprep)
        {
            String             s    = Encoding.UTF8.GetString(src);
            UCharacterIterator iter = UCharacterIterator.GetInstance(s);
            StringBuffer       @out = strprep.Prepare(iter, StringPrepOptions.Default);

            return(Encoding.UTF8.GetBytes(@out.ToString()));
        }

Example #10

0

Show file

File: FCDIterCollationIterator.cs Project: SilentCC/ICU4N

 public FCDIterCollationIterator(CollationData data, bool numeric,
                                 UCharacterIterator ui, int startIndex)
     : base(data, numeric, ui)
 {
     state   = State.IterCheckFwd;
     start   = startIndex;
     nfcImpl = data.nfcImpl;
 }

Example #11

0

Show file

        public StringBuffer Prepare(String src, StringPrepOptions options)
        {
            int                ch;
            String             mapOut = Map(src, options);
            UCharacterIterator iter   = UCharacterIterator.GetInstance(mapOut);

            UCharacterDirection direction = UCharacterDirectionExtensions.CharDirectionCount,
                                firstCharDir = UCharacterDirectionExtensions.CharDirectionCount;
            int  rtlPos = -1, ltrPos = -1;
            bool rightToLeft = false, leftToRight = false;

            while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done)
            {
                if (transform.prohibitedSet.Contains(ch) == true && ch != 0x0020)
                {
                    throw new StringPrepParseException("A prohibited code point was found in the input",
                                                       StringPrepErrorType.ProhibitedError,
                                                       iter.GetText(), iter.Index);
                }

                direction = UChar.GetDirection(ch);
                if (firstCharDir == UCharacterDirectionExtensions.CharDirectionCount)
                {
                    firstCharDir = direction;
                }
                if (direction == UCharacterDirection.LeftToRight)
                {
                    leftToRight = true;
                    ltrPos      = iter.Index - 1;
                }
                if (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic)
                {
                    rightToLeft = true;
                    rtlPos      = iter.Index - 1;
                }
            }

            // satisfy 2
            if (leftToRight == true && rightToLeft == true)
            {
                throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
                                                   StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
            }

            //satisfy 3
            if (rightToLeft == true &&
                !((firstCharDir == UCharacterDirection.RightToLeft || firstCharDir == UCharacterDirection.RightToLeftArabic) &&
                  (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic))
                )
            {
                throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
                                                   StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
            }

            return(new StringBuffer(mapOut));
        }

Example #12

0

Show file

        //  TODO: optimize
        public static int Compare(UCharacterIterator i1, UCharacterIterator i2, IDNA2003Options options)
        {
            if (i1 == null || i2 == null)
            {
                throw new ArgumentException("One of the source buffers is null");
            }
            StringBuffer s1Out = ConvertIDNToASCII(i1.GetText(), options);
            StringBuffer s2Out = ConvertIDNToASCII(i2.GetText(), options);

            return(CompareCaseInsensitiveASCII(s1Out, s2Out));
        }

Example #13

0

Show file

        public void TestClone()
        {
            UCharacterIterator iterator = UCharacterIterator.GetInstance("testing");
            UCharacterIterator cloned   = (UCharacterIterator)iterator.Clone();
            int completed = 0;

            while (completed != UCharacterIterator.DONE)
            {
                completed = iterator.Next();
                if (completed != cloned.Next())
                {
                    Errln("Cloned operation failed");
                }
            }
        }

Example #14

0

Show file

        private String Map(String src, StringPrepOptions options)
        {
            // map
            bool allowUnassigned = ((options & ALLOW_UNASSIGNED) > 0);
            // disable test
            String             caseMapOut = mapTransform.Transliterate(src);
            UCharacterIterator iter       = UCharacterIterator.GetInstance(caseMapOut);
            int ch;

            while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done)
            {
                if (transform.unassignedSet.Contains(ch) == true && allowUnassigned == false)
                {
                    throw new StringPrepParseException("An unassigned code point was found in the input",
                                                       StringPrepErrorType.UnassignedError);
                }
            }
            return(caseMapOut);
        }

Example #15

0

Show file

 public void getText(UCharacterIterator iterator, String result)
 {
     /* test getText */
     char[] buf = new char[1];
     for (; ;)
     {
         try
         {
             iterator.GetText(buf);
             break;
         }
         catch (IndexOutOfRangeException e)
         {
             buf = new char[iterator.Length];
         }
     }
     if (result.CompareToOrdinal(new string(buf, 0, iterator.Length)) != 0)
     {
         Errln("getText failed for iterator");
     }
 }

Example #16

0

Show file

File: PunycodeReference.cs Project: NightOwl888/ICU4N

        public static StringBuffer Encode(StringBuffer input, char[] case_flags)
        {
            int[]              @in   = new int[input.Length];
            int                inLen = 0;
            int                ch;
            StringBuffer       result = new StringBuffer();
            UCharacterIterator iter   = UCharacterIterator.GetInstance(input);

            while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done)
            {
                @in[inLen++] = ch;
            }

            int[] outLen = new int[1];
            outLen[0] = input.Length * 4;
            char[] output = new char[outLen[0]];
            int    rc     = punycode_success;

            for (; ;)
            {
                rc = Encode(inLen, @in, case_flags, outLen, output);
                if (rc == punycode_big_output)
                {
                    outLen[0] = outLen[0] * 4;
                    output    = new char[outLen[0]];
                    // continue to convert
                    continue;
                }
                break;
            }
            if (rc == punycode_success)
            {
                return(result.Append(output, 0, outLen[0]));
            }
            GetException(rc);
            return(result);
        }

Example #17

0

Show file

 public static StringBuffer ConvertIDNToUnicode(UCharacterIterator iter, IDNA2003Options options)
 {
     return(ConvertIDNToUnicode(iter.GetText(), options));
 }

Example #18

0

Show file

        public static StringBuffer ConvertToUnicode(UCharacterIterator iter, IDNA2003Options options)
        {
            // the source contains all ascii codepoints
            bool srcIsASCII = true;

            int ch;
            int saveIndex = iter.Index;

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = iter.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                    break;
                }
            }

            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>
            do
            {
                StringBuffer processOut;
                if (srcIsASCII == false)
                {
                    // step 2: process the string
                    iter.Index = (saveIndex);
                    try
                    {
                        processOut = transform.Prepare(iter, (StringPrepOptions)options);
                    }
                    catch (StringPrepParseException e)
                    {
                        break;
                    }
                }
                else
                {
                    // just point to source
                    processOut = new StringBuffer(iter.GetText());
                }

                // step 3: verify ACE Prefix
                if (StartsWithPrefix(processOut))
                {
                    // step 4: Remove the ACE Prefix
                    String temp = processOut.ToString(ACE_PREFIX_LENGTH, processOut.Length - ACE_PREFIX_LENGTH);

                    // step 5: Decode using punycode
                    StringBuffer decodeOut = null;
                    try
                    {
                        decodeOut = PunycodeReference.Decode(new StringBuffer(temp), null);
                    }
                    catch (StringPrepParseException e)
                    {
                        break;
                    }

                    // step 6:Apply toASCII
                    StringBuffer toASCIIOut = ConvertToASCII(decodeOut, options);

                    // step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        break;
                    }
                    // step 8: return output of step 5
                    return(decodeOut);
                }
            } while (false);

            return(new StringBuffer(iter.GetText()));
        }

Example #19

0

Show file

        public static StringBuffer ConvertToUnicode(StringBuffer src, IDNA2003Options options)
        {
            UCharacterIterator iter = UCharacterIterator.GetInstance(src);

            return(ConvertToUnicode(iter, options));
        }

Example #20

0

Show file

        public void TestUCharacterIteratorWrapper()
        {
            String             source  = "asdfasdfjoiuyoiuy2341235679886765";
            UCharacterIterator it      = UCharacterIterator.GetInstance(source);
            CharacterIterator  wrap_ci = it.GetCharacterIterator();
            CharacterIterator  ci      = new StringCharacterIterator(source);

            wrap_ci.SetIndex(10);
            ci.SetIndex(10);
            String moves = "0+0+0--0-0-+++0--+++++++0--------++++0000----0-";
            int    c1, c2;
            char   m;
            int    movesIndex = 0;

            while (movesIndex < moves.Length)
            {
                m = moves[movesIndex++];
                if (m == '-')
                {
                    c1 = wrap_ci.Previous();
                    c2 = ci.Previous();
                }
                else if (m == '0')
                {
                    c1 = wrap_ci.Current;
                    c2 = ci.Current;
                }
                else
                {// m=='+'
                    c1 = wrap_ci.Next();
                    c2 = ci.Next();
                }

                // compare results
                if (c1 != c2)
                {
                    // copy the moves until the current (m) move, and terminate
                    String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter
                    Errln("error: mismatch in Normalizer iteration at " + history + ": "
                          + "got c1= " + Hex(c1) + " != expected c2= " + Hex(c2));
                    break;
                }

                // compare indexes
                if (wrap_ci.Index != ci.Index)
                {
                    // copy the moves until the current (m) move, and terminate
                    String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter
                    Errln("error: index mismatch in Normalizer iteration at "
                          + history + " : " + "Normalizer index " + wrap_ci.Index
                          + " expected " + ci.Index);
                    break;
                }
            }
            if (ci.First() != wrap_ci.First())
            {
                Errln("CharacterIteratorWrapper.First() failed. expected: " + ci.First() + " got: " + wrap_ci.First());
            }
            if (ci.Last() != wrap_ci.Last())
            {
                Errln("CharacterIteratorWrapper.Last() failed expected: " + ci.Last() + " got: " + wrap_ci.Last());
            }
            if (ci.BeginIndex != wrap_ci.BeginIndex)
            {
                Errln("CharacterIteratorWrapper.BeginIndex failed expected: " + ci.BeginIndex + " got: " + wrap_ci.BeginIndex);
            }
            if (ci.EndIndex != wrap_ci.EndIndex)
            {
                Errln("CharacterIteratorWrapper.EndIndex failed expected: " + ci.EndIndex + " got: " + wrap_ci.EndIndex);
            }
            try
            {
                CharacterIterator cloneWCI = (CharacterIterator)wrap_ci.Clone();
                if (wrap_ci.Index != cloneWCI.Index)
                {
                    Errln("CharacterIteratorWrapper.Clone() failed expected: " + wrap_ci.Index + " got: " + cloneWCI.Index);
                }
            }
            catch (Exception e)
            {
                Errln("CharacterIterator.Clone() failed");
            }
        }

Example #21

0

Show file

        public static StringBuffer ConvertToASCII(UCharacterIterator srcIter, IDNA2003Options options)
        {
            char[]
            caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            bool srcIsLDH = true;

            //get the options
            bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            int ch;

            // step 1
            while ((ch = srcIter.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7f)
                {
                    srcIsASCII = false;
                }
            }
            int failPos = -1;

            srcIter.SetToStart();
            StringBuffer processOut = null;

            // step 2 is performed only if the source contains non ASCII
            if (!srcIsASCII)
            {
                // step 2
                processOut = transform.Prepare(srcIter, (StringPrepOptions)options);
            }
            else
            {
                processOut = new StringBuffer(srcIter.GetText());
            }
            int poLen = processOut.Length;

            if (poLen == 0)
            {
                throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel);
            }
            StringBuffer dest = new StringBuffer();

            // reset the variable to verify if output of prepare is ASCII or not
            srcIsASCII = true;

            // step 3 & 4
            for (int j = 0; j < poLen; j++)
            {
                ch = processOut[j];
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }
                else if (IsLDHChar(ch) == false)
                {
                    // here we do not assemble surrogates
                    // since we know that LDH code points
                    // are in the ASCII range only
                    srcIsLDH = false;
                    failPos  = j;
                }
            }

            if (useSTD3ASCIIRules == true)
            {
                // verify 3a and 3b
                if (srcIsLDH == false || /* source contains some non-LDH characters */
                    processOut[0] == HYPHEN ||
                    processOut[processOut.Length - 1] == HYPHEN)
                {
                    /* populate the parseError struct */
                    if (srcIsLDH == false)
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError,
                                                           processOut.ToString(),
                                                           (failPos > 0) ? (failPos - 1) : failPos);
                    }
                    else if (processOut[0] == HYPHEN)
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), 0);
                    }
                    else
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError,
                                                           processOut.ToString(),
                                                           (poLen > 0) ? poLen - 1 : poLen);
                    }
                }
            }
            if (srcIsASCII)
            {
                dest = processOut;
            }
            else
            {
                // step 5 : verify the sequence does not begin with ACE prefix
                if (!StartsWithPrefix(processOut))
                {
                    //step 6: encode the sequence with punycode
                    StringBuffer punyout = PunycodeReference.Encode(processOut, caseFlags);

                    // convert all codepoints to lower case ASCII
                    StringBuffer lowerOut = ToASCIILower(punyout);

                    //Step 7: prepend the ACE prefix
                    dest.Append(ACE_PREFIX, 0, ACE_PREFIX_LENGTH - 0); // ICU4N: Checked 3rd parameter
                                                                       //Step 6: copy the contents in b2 into dest
                    dest.Append(lowerOut);
                }
                else
                {
                    throw new StringPrepParseException("The input does not start with the ACE Prefix.",
                                                       StringPrepErrorType.AcePrefixError, processOut.ToString(), 0);
                }
            }
            if (dest.Length > MAX_LABEL_LENGTH)
            {
                throw new StringPrepParseException("The labels in the input are too long. Length > 64.",
                                                   StringPrepErrorType.LabelTooLongError, dest.ToString(), 0);
            }
            return(dest);
        }

Example #22

0

Show file

File: IterCollationIterator.cs Project: SilentCC/ICU4N

 public IterCollationIterator(CollationData d, bool numeric, UCharacterIterator ui)
     : base(d, numeric)
 {
     iter = ui;
 }

Example #23

0

Show file

        //
        // toUnicode operation; should only apply to a single label
        //
        private static String ToUnicodeInternal(String label, int flag)
        {
            bool[]       caseFlags = null;
            StringBuffer dest;

            // step 1
            // find out if all the codepoints in input are ASCII
            bool isASCII = IsAllASCII(label);

            if (!isASCII)
            {
                // step 2
                // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
                try
                {
                    UCharacterIterator iter = UCharacterIterator.getInstance(label);
                    dest = NamePrep.prepare(iter, flag);
                }
                catch (Exception)
                {
                    // toUnicode never fails; if any step fails, return the input string
                    return(label);
                }
            }
            else
            {
                dest = new StringBuffer(label);
            }

            // step 3
            // verify ACE Prefix
            if (StartsWithACEPrefix(dest))
            {
                // step 4
                // Remove the ACE Prefix
                String temp = dest.Substring(ACE_PREFIX_LENGTH, dest.Length() - ACE_PREFIX_LENGTH);

                try
                {
                    // step 5
                    // Decode using punycode
                    StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);

                    // step 6
                    // Apply toASCII
                    String toASCIIOut = ToASCII(decodeOut.ToString(), flag);

                    // step 7
                    // verify
                    if (toASCIIOut.EqualsIgnoreCase(dest.ToString()))
                    {
                        // step 8
                        // return output of step 5
                        return(decodeOut.ToString());
                    }
                }
                catch (Exception)
                {
                    // no-op
                }
            }

            // just return the input
            return(label);
        }

Example #24

0

Show file

        //
        // toASCII operation; should only apply to a single label
        //
        private static String ToASCIIInternal(String label, int flag)
        {
            // step 1
            // Check if the string contains code points outside the ASCII range 0..0x7c.
            bool         isASCII = IsAllASCII(label);
            StringBuffer dest;

            // step 2
            // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
            if (!isASCII)
            {
                UCharacterIterator iter = UCharacterIterator.getInstance(label);
                try
                {
                    dest = NamePrep.prepare(iter, flag);
                }
                catch (java.text.ParseException e)
                {
                    throw new IllegalArgumentException(e);
                }
            }
            else
            {
                dest = new StringBuffer(label);
            }

            // step 8, move forward to check the smallest number of the code points
            // the length must be inside 1..63
            if (dest.Length() == 0)
            {
                throw new IllegalArgumentException("Empty label is not a legal name");
            }

            // step 3
            // Verify the absence of non-LDH ASCII code points
            //   0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f
            // Verify the absence of leading and trailing hyphen
            bool useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0);

            if (useSTD3ASCIIRules)
            {
                for (int i = 0; i < dest.Length(); i++)
                {
                    int c = dest.CharAt(i);
                    if (IsNonLDHAsciiCodePoint(c))
                    {
                        throw new IllegalArgumentException("Contains non-LDH ASCII characters");
                    }
                }

                if (dest.CharAt(0) == '-' || dest.CharAt(dest.Length() - 1) == '-')
                {
                    throw new IllegalArgumentException("Has leading or trailing hyphen");
                }
            }

            if (!isASCII)
            {
                // step 4
                // If all code points are inside 0..0x7f, skip to step 8
                if (!IsAllASCII(dest.ToString()))
                {
                    // step 5
                    // verify the sequence does not begin with ACE prefix
                    if (!StartsWithACEPrefix(dest))
                    {
                        // step 6
                        // encode the sequence with punycode
                        try
                        {
                            dest = Punycode.encode(dest, null);
                        }
                        catch (java.text.ParseException e)
                        {
                            throw new IllegalArgumentException(e);
                        }

                        dest = ToASCIILower(dest);

                        // step 7
                        // prepend the ACE prefix
                        dest.Insert(0, ACE_PREFIX);
                    }
                    else
                    {
                        throw new IllegalArgumentException("The input starts with the ACE Prefix");
                    }
                }
            }

            // step 8
            // the length must be inside 1..63
            if (dest.Length() > MAX_LABEL_LENGTH)
            {
                throw new IllegalArgumentException("The label in the input is too long");
            }

            return(dest.ToString());
        }

Example #25

0

Show file

        public void TestIteration()
        {
            UCharacterIterator iterator = UCharacterIterator.GetInstance(
                ITERATION_STRING_);
            UCharacterIterator iterator2 = UCharacterIterator.GetInstance(
                ITERATION_STRING_);

            iterator.SetToStart();
            if (iterator.Current != ITERATION_STRING_[0])
            {
                Errln("Iterator failed retrieving first character");
            }
            iterator.SetToLimit();
            if (iterator.Previous() != ITERATION_STRING_[
                    ITERATION_STRING_.Length - 1])
            {
                Errln("Iterator failed retrieving last character");
            }
            if (iterator.Length != ITERATION_STRING_.Length)
            {
                Errln("Iterator failed determining begin and end index");
            }
            iterator2.Index = 0;
            iterator.Index  = 0;
            int ch = 0;

            while (ch != UCharacterIterator.DONE)
            {
                int index = iterator2.Index;
                ch = iterator2.NextCodePoint();
                if (index != ITERATION_SUPPLEMENTARY_INDEX)
                {
                    if (ch != iterator.Next() &&
                        ch != UCharacterIterator.DONE)
                    {
                        Errln("Error mismatch in next() and nextCodePoint()");
                    }
                }
                else
                {
                    if (UTF16.GetLeadSurrogate(ch) != iterator.Next() ||
                        UTF16.GetTrailSurrogate(ch) != iterator.Next())
                    {
                        Errln("Error mismatch in next and nextCodePoint for " +
                              "supplementary characters");
                    }
                }
            }
            iterator.Index  = ITERATION_STRING_.Length;
            iterator2.Index = ITERATION_STRING_.Length;
            while (ch != UCharacterIterator.DONE)
            {
                int index = iterator2.Index;
                ch = iterator2.PreviousCodePoint();
                if (index != ITERATION_SUPPLEMENTARY_INDEX)
                {
                    if (ch != iterator.Previous() &&
                        ch != UCharacterIterator.DONE)
                    {
                        Errln("Error mismatch in previous() and " +
                              "previousCodePoint()");
                    }
                }
                else
                {
                    if (UTF16.GetLeadSurrogate(ch) != iterator.Previous() ||
                        UTF16.GetTrailSurrogate(ch) != iterator.Previous())
                    {
                        Errln("Error mismatch in previous and " +
                              "previousCodePoint for supplementary characters");
                    }
                }
            }
        }

Example #26

0

Show file

        public void previousNext(UCharacterIterator iter)
        {
            int[] expect =
            {
                0x2f999,
                0x1d15f,
                0xc4,
                0x1ed0
            };

            // expected src indexes corresponding to expect indexes
            int[] expectIndex =
            {
                0, 0,
                1, 1,
                2,
                3,
                4 //needed
            };

            // initial indexes into the src and expect strings

            int SRC_MIDDLE    = 4;
            int EXPECT_MIDDLE = 2;


            // movement vector
            // - for previous(), 0 for current(), + for next()
            // not const so that we can terminate it below for the error message
            String moves = "0+0+0--0-0-+++0--+++++++0--------";


            UCharIterator iter32 = new UCharIterator(expect, expect.Length,
                                                     EXPECT_MIDDLE);

            int  c1, c2;
            char m;

            // initially set the indexes into the middle of the strings
            iter.Index = (SRC_MIDDLE);

            // move around and compare the iteration code points with
            // the expected ones
            int movesIndex = 0;

            while (movesIndex < moves.Length)
            {
                m = moves[movesIndex++];
                if (m == '-')
                {
                    c1 = iter.PreviousCodePoint();
                    c2 = iter32.Previous();
                }
                else if (m == '0')
                {
                    c1 = iter.CurrentCodePoint;
                    c2 = iter32.Current;
                }
                else
                {// m=='+'
                    c1 = iter.NextCodePoint();
                    c2 = iter32.Next();
                }

                // compare results
                if (c1 != c2)
                {
                    // copy the moves until the current (m) move, and terminate
                    String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter
                    Errln("error: mismatch in Normalizer iteration at " + history + ": "
                          + "got c1= " + Hex(c1) + " != expected c2= " + Hex(c2));
                    break;
                }

                // compare indexes
                if (expectIndex[iter.Index] != iter32.Index)
                {
                    // copy the moves until the current (m) move, and terminate
                    String history = moves.Substring(0, movesIndex - 0); // ICU4N: Checked 2nd parameter
                    Errln("error: index mismatch in Normalizer iteration at "
                          + history + " : " + "Normalizer index " + iter.Index
                          + " expected " + expectIndex[iter32.Index]);
                    break;
                }
            }
        }

Example #27

0

Show file

        public void TestSetText(/* char* par */)
        {
            RuleBasedCollator        en_us = (RuleBasedCollator)Collator.GetInstance(new CultureInfo("en-US"));
            CollationElementIterator iter1 = en_us.GetCollationElementIterator(test1);
            CollationElementIterator iter2 = en_us.GetCollationElementIterator(test2);

            // Run through the second iterator just to exercise it
            int c = iter2.Next();
            int i = 0;

            while (++i < 10 && c != CollationElementIterator.NULLORDER)
            {
                try
                {
                    c = iter2.Next();
                }
                catch (Exception e)
                {
                    Errln("iter2.Next() returned an error.");
                    break;
                }
            }

            // Now set it to point to the same string as the first iterator
            try
            {
                iter2.SetText(test1);
            }
            catch (Exception e)
            {
                Errln("call to iter2->setText(test1) failed.");
                return;
            }
            assertEqual(iter1, iter2);

            iter1.Reset();
            //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
            CharacterIterator chariter = new StringCharacterIterator(test1);

            try
            {
                iter2.SetText(chariter);
            }
            catch (Exception e)
            {
                Errln("call to iter2->setText(chariter(test1)) failed.");
                return;
            }
            assertEqual(iter1, iter2);

            iter1.Reset();
            //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
            UCharacterIterator uchariter = UCharacterIterator.GetInstance(test1);

            try
            {
                iter2.SetText(uchariter);
            }
            catch (Exception e)
            {
                Errln("call to iter2->setText(uchariter(test1)) failed.");
                return;
            }
            assertEqual(iter1, iter2);
        }

Example #28

0

Show file

 public StringBuffer Prepare(UCharacterIterator src,
                             StringPrepOptions options)
 {
     return(Prepare(src.GetText(), options));
 }

Example #29

0

Show file

File: SimpleFilteredSentenceBreakIterator.cs Project: SilentCC/ICU4N

 /// <summary>
 /// Reset the filter from the delegate.
 /// </summary>
 private void ResetState()
 {
     text = UCharacterIterator.GetInstance((CharacterIterator)@delegate.Text.Clone());
 }

Example #30

0

Show file

        public static StringBuffer ConvertToUnicode(UCharacterIterator src, IDNA2003Options options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            //bool srcIsLDH = true;

            //get the options
            //bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            //int failPos = -1;
            int ch;
            int saveIndex = src.Index;

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = src.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }/*else if((srcIsLDH = isLDHChar(ch))==false){
                  * failPos = src.getIndex();
                  * }*/
            }
            StringBuffer processOut;

            if (srcIsASCII == false)
            {
                try
                {
                    // step 2: process the string
                    src.Index  = saveIndex;
                    processOut = namePrep.Prepare(src, (StringPrepOptions)options);
                }
                catch (StringPrepParseException ex)
                {
                    return(new StringBuffer(src.GetText()));
                }
            }
            else
            {
                //just point to source
                processOut = new StringBuffer(src.GetText());
            }
            // TODO:
            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>

            //step 3: verify ACE Prefix
            if (StartsWithPrefix(processOut))
            {
                StringBuffer decodeOut = null;

                //step 4: Remove the ACE Prefix
                string temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length);

                //step 5: Decode using punycode
                try
                {
                    decodeOut = new StringBuffer(Punycode.Decode(temp, caseFlags).ToString());
                }
                catch (StringPrepParseException e)
                {
                    decodeOut = null;
                }

                //step 6:Apply toASCII
                if (decodeOut != null)
                {
                    StringBuffer toASCIIOut = ConvertToASCII(UCharacterIterator.GetInstance(decodeOut), options);

                    //step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        //                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
                        //                                             StringPrepParseException.VERIFICATION_ERROR);
                        decodeOut = null;
                    }
                }

                //step 8: return output of step 5
                if (decodeOut != null)
                {
                    return(decodeOut);
                }
            }

            //        }else{
            //            // verify that STD3 ASCII rules are satisfied
            //            if(useSTD3ASCIIRules == true){
            //                if( srcIsLDH == false /* source contains some non-LDH characters */
            //                    || processOut.charAt(0) ==  HYPHEN
            //                    || processOut.charAt(processOut.Length-1) == HYPHEN){
            //
            //                    if(srcIsLDH==false){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
            //                                                 (failPos>0) ? (failPos-1) : failPos);
            //                    }else if(processOut.charAt(0) == HYPHEN){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),0);
            //
            //                    }else{
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),
            //                                                 processOut.Length);
            //
            //                    }
            //                }
            //            }
            //            // just return the source
            //            return new StringBuffer(src.getText());
            //        }

            return(new StringBuffer(src.GetText()));
        }

C# (CSharp) UCharacterIterator Examples