UCharacterIterator.GetText C# (CSharp) Code Examples

Example #1

0

Show file

        public StringBuffer Prepare(String src, StringPrepOptions options)
        {
            int                ch;
            String             mapOut = Map(src, options);
            UCharacterIterator iter   = UCharacterIterator.GetInstance(mapOut);

            UCharacterDirection direction = UCharacterDirectionExtensions.CharDirectionCount,
                                firstCharDir = UCharacterDirectionExtensions.CharDirectionCount;
            int  rtlPos = -1, ltrPos = -1;
            bool rightToLeft = false, leftToRight = false;

            while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done)
            {
                if (transform.prohibitedSet.Contains(ch) == true && ch != 0x0020)
                {
                    throw new StringPrepParseException("A prohibited code point was found in the input",
                                                       StringPrepErrorType.ProhibitedError,
                                                       iter.GetText(), iter.Index);
                }

                direction = UChar.GetDirection(ch);
                if (firstCharDir == UCharacterDirectionExtensions.CharDirectionCount)
                {
                    firstCharDir = direction;
                }
                if (direction == UCharacterDirection.LeftToRight)
                {
                    leftToRight = true;
                    ltrPos      = iter.Index - 1;
                }
                if (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic)
                {
                    rightToLeft = true;
                    rtlPos      = iter.Index - 1;
                }
            }

            // satisfy 2
            if (leftToRight == true && rightToLeft == true)
            {
                throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
                                                   StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
            }

            //satisfy 3
            if (rightToLeft == true &&
                !((firstCharDir == UCharacterDirection.RightToLeft || firstCharDir == UCharacterDirection.RightToLeftArabic) &&
                  (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic))
                )
            {
                throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
                                                   StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
            }

            return(new StringBuffer(mapOut));
        }

Example #2

0

Show file

        //  TODO: optimize
        public static int Compare(UCharacterIterator i1, UCharacterIterator i2, IDNA2003Options options)
        {
            if (i1 == null || i2 == null)
            {
                throw new ArgumentException("One of the source buffers is null");
            }
            StringBuffer s1Out = ConvertIDNToASCII(i1.GetText(), options);
            StringBuffer s2Out = ConvertIDNToASCII(i2.GetText(), options);

            return(CompareCaseInsensitiveASCII(s1Out, s2Out));
        }

Example #3

0

Show file

 public void getText(UCharacterIterator iterator, String result)
 {
     /* test getText */
     char[] buf = new char[1];
     for (; ;)
     {
         try
         {
             iterator.GetText(buf);
             break;
         }
         catch (IndexOutOfRangeException e)
         {
             buf = new char[iterator.Length];
         }
     }
     if (result.CompareToOrdinal(new string(buf, 0, iterator.Length)) != 0)
     {
         Errln("getText failed for iterator");
     }
 }

Example #4

0

Show file

 public static StringBuffer ConvertIDNToUnicode(UCharacterIterator iter, IDNA2003Options options)
 {
     return(ConvertIDNToUnicode(iter.GetText(), options));
 }

Example #5

0

Show file

        public static StringBuffer ConvertToUnicode(UCharacterIterator iter, IDNA2003Options options)
        {
            // the source contains all ascii codepoints
            bool srcIsASCII = true;

            int ch;
            int saveIndex = iter.Index;

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = iter.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                    break;
                }
            }

            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>
            do
            {
                StringBuffer processOut;
                if (srcIsASCII == false)
                {
                    // step 2: process the string
                    iter.Index = (saveIndex);
                    try
                    {
                        processOut = transform.Prepare(iter, (StringPrepOptions)options);
                    }
                    catch (StringPrepParseException e)
                    {
                        break;
                    }
                }
                else
                {
                    // just point to source
                    processOut = new StringBuffer(iter.GetText());
                }

                // step 3: verify ACE Prefix
                if (StartsWithPrefix(processOut))
                {
                    // step 4: Remove the ACE Prefix
                    String temp = processOut.ToString(ACE_PREFIX_LENGTH, processOut.Length - ACE_PREFIX_LENGTH);

                    // step 5: Decode using punycode
                    StringBuffer decodeOut = null;
                    try
                    {
                        decodeOut = PunycodeReference.Decode(new StringBuffer(temp), null);
                    }
                    catch (StringPrepParseException e)
                    {
                        break;
                    }

                    // step 6:Apply toASCII
                    StringBuffer toASCIIOut = ConvertToASCII(decodeOut, options);

                    // step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        break;
                    }
                    // step 8: return output of step 5
                    return(decodeOut);
                }
            } while (false);

            return(new StringBuffer(iter.GetText()));
        }

Example #6

0

Show file

        public static StringBuffer ConvertToASCII(UCharacterIterator srcIter, IDNA2003Options options)
        {
            char[]
            caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            bool srcIsLDH = true;

            //get the options
            bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            int ch;

            // step 1
            while ((ch = srcIter.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7f)
                {
                    srcIsASCII = false;
                }
            }
            int failPos = -1;

            srcIter.SetToStart();
            StringBuffer processOut = null;

            // step 2 is performed only if the source contains non ASCII
            if (!srcIsASCII)
            {
                // step 2
                processOut = transform.Prepare(srcIter, (StringPrepOptions)options);
            }
            else
            {
                processOut = new StringBuffer(srcIter.GetText());
            }
            int poLen = processOut.Length;

            if (poLen == 0)
            {
                throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel);
            }
            StringBuffer dest = new StringBuffer();

            // reset the variable to verify if output of prepare is ASCII or not
            srcIsASCII = true;

            // step 3 & 4
            for (int j = 0; j < poLen; j++)
            {
                ch = processOut[j];
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }
                else if (IsLDHChar(ch) == false)
                {
                    // here we do not assemble surrogates
                    // since we know that LDH code points
                    // are in the ASCII range only
                    srcIsLDH = false;
                    failPos  = j;
                }
            }

            if (useSTD3ASCIIRules == true)
            {
                // verify 3a and 3b
                if (srcIsLDH == false || /* source contains some non-LDH characters */
                    processOut[0] == HYPHEN ||
                    processOut[processOut.Length - 1] == HYPHEN)
                {
                    /* populate the parseError struct */
                    if (srcIsLDH == false)
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError,
                                                           processOut.ToString(),
                                                           (failPos > 0) ? (failPos - 1) : failPos);
                    }
                    else if (processOut[0] == HYPHEN)
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), 0);
                    }
                    else
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError,
                                                           processOut.ToString(),
                                                           (poLen > 0) ? poLen - 1 : poLen);
                    }
                }
            }
            if (srcIsASCII)
            {
                dest = processOut;
            }
            else
            {
                // step 5 : verify the sequence does not begin with ACE prefix
                if (!StartsWithPrefix(processOut))
                {
                    //step 6: encode the sequence with punycode
                    StringBuffer punyout = PunycodeReference.Encode(processOut, caseFlags);

                    // convert all codepoints to lower case ASCII
                    StringBuffer lowerOut = ToASCIILower(punyout);

                    //Step 7: prepend the ACE prefix
                    dest.Append(ACE_PREFIX, 0, ACE_PREFIX_LENGTH - 0); // ICU4N: Checked 3rd parameter
                                                                       //Step 6: copy the contents in b2 into dest
                    dest.Append(lowerOut);
                }
                else
                {
                    throw new StringPrepParseException("The input does not start with the ACE Prefix.",
                                                       StringPrepErrorType.AcePrefixError, processOut.ToString(), 0);
                }
            }
            if (dest.Length > MAX_LABEL_LENGTH)
            {
                throw new StringPrepParseException("The labels in the input are too long. Length > 64.",
                                                   StringPrepErrorType.LabelTooLongError, dest.ToString(), 0);
            }
            return(dest);
        }

Example #7

0

Show file

        public static StringBuffer ConvertToUnicode(UCharacterIterator src, IDNA2003Options options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            //bool srcIsLDH = true;

            //get the options
            //bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            //int failPos = -1;
            int ch;
            int saveIndex = src.Index;

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = src.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }/*else if((srcIsLDH = isLDHChar(ch))==false){
                  * failPos = src.getIndex();
                  * }*/
            }
            StringBuffer processOut;

            if (srcIsASCII == false)
            {
                try
                {
                    // step 2: process the string
                    src.Index  = saveIndex;
                    processOut = namePrep.Prepare(src, (StringPrepOptions)options);
                }
                catch (StringPrepParseException ex)
                {
                    return(new StringBuffer(src.GetText()));
                }
            }
            else
            {
                //just point to source
                processOut = new StringBuffer(src.GetText());
            }
            // TODO:
            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>

            //step 3: verify ACE Prefix
            if (StartsWithPrefix(processOut))
            {
                StringBuffer decodeOut = null;

                //step 4: Remove the ACE Prefix
                string temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length);

                //step 5: Decode using punycode
                try
                {
                    decodeOut = new StringBuffer(Punycode.Decode(temp, caseFlags).ToString());
                }
                catch (StringPrepParseException e)
                {
                    decodeOut = null;
                }

                //step 6:Apply toASCII
                if (decodeOut != null)
                {
                    StringBuffer toASCIIOut = ConvertToASCII(UCharacterIterator.GetInstance(decodeOut), options);

                    //step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        //                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
                        //                                             StringPrepParseException.VERIFICATION_ERROR);
                        decodeOut = null;
                    }
                }

                //step 8: return output of step 5
                if (decodeOut != null)
                {
                    return(decodeOut);
                }
            }

            //        }else{
            //            // verify that STD3 ASCII rules are satisfied
            //            if(useSTD3ASCIIRules == true){
            //                if( srcIsLDH == false /* source contains some non-LDH characters */
            //                    || processOut.charAt(0) ==  HYPHEN
            //                    || processOut.charAt(processOut.Length-1) == HYPHEN){
            //
            //                    if(srcIsLDH==false){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
            //                                                 (failPos>0) ? (failPos-1) : failPos);
            //                    }else if(processOut.charAt(0) == HYPHEN){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),0);
            //
            //                    }else{
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),
            //                                                 processOut.Length);
            //
            //                    }
            //                }
            //            }
            //            // just return the source
            //            return new StringBuffer(src.getText());
            //        }

            return(new StringBuffer(src.GetText()));
        }

Example #8

0

Show file

 public StringBuffer Prepare(UCharacterIterator src,
                             StringPrepOptions options)
 {
     return(Prepare(src.GetText(), options));
 }

Example #9

0

Show file

        public void TestIterationUChar32()
        {
            String text = "\u0061\u0062\ud841\udc02\u20ac\ud7ff\ud842\udc06\ud801\udc00\u0061";
            int    c;
            int    i;
            {
                UCharacterIterator iter = UCharacterIterator.GetInstance(text);

                String iterText = iter.GetText();
                if (!iterText.Equals(text))
                {
                    Errln("iter.getText() failed");
                }

                iter.Index = (1);
                if (iter.CurrentCodePoint != UTF16.CharAt(text, 1))
                {
                    Errln("Iterator didn't start out in the right place.");
                }

                iter.SetToStart();
                c = iter.CurrentCodePoint;
                i = 0;
                i = iter.MoveCodePointIndex(1);
                c = iter.CurrentCodePoint;
                if (c != UTF16.CharAt(text, 1) || i != 1)
                {
                    Errln("moveCodePointIndex(1) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 1)) + " i= " + i);
                }

                i = iter.MoveCodePointIndex(2);
                c = iter.CurrentCodePoint;
                if (c != UTF16.CharAt(text, 4) || i != 4)
                {
                    Errln("moveCodePointIndex(2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 4)) + " i= " + i);
                }

                i = iter.MoveCodePointIndex(-2);
                c = iter.CurrentCodePoint;
                if (c != UTF16.CharAt(text, 1) || i != 1)
                {
                    Errln("moveCodePointIndex(-2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 1)) + " i= " + i);
                }

                iter.SetToLimit();
                i = iter.MoveCodePointIndex(-2);
                c = iter.CurrentCodePoint;
                if (c != UTF16.CharAt(text, (text.Length - 3)) || i != (text.Length - 3))
                {
                    Errln("moveCodePointIndex(-2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, (text.Length - 3))) + " i= " + i);
                }

                iter.SetToStart();
                c = iter.CurrentCodePoint;
                i = 0;

                //testing first32PostInc, nextCodePointPostInc, setTostart
                i = 0;
                iter.SetToStart();
                c = iter.Next();
                if (c != UTF16.CharAt(text, i))
                {
                    Errln("first32PostInc failed.  Expected->" + Hex(UTF16.CharAt(text, i)) + " Got-> " + Hex(c));
                }
                if (iter.Index != UTF16.GetCharCount(c) + i)
                {
                    Errln("getIndex() after first32PostInc() failed");
                }

                iter.SetToStart();
                i = 0;
                if (iter.Index != 0)
                {
                    Errln("setToStart failed");
                }

                Logln("Testing forward iteration...");
                do
                {
                    if (c != UCharacterIterator.DONE)
                    {
                        c = iter.NextCodePoint();
                    }

                    if (c != UTF16.CharAt(text, i))
                    {
                        Errln("Character mismatch at position " + i + ", iterator has " + Hex(c) + ", string has " + Hex(UTF16.CharAt(text, i)));
                    }

                    i += UTF16.GetCharCount(c);
                    if (iter.Index != i)
                    {
                        Errln("getIndex() aftr nextCodePointPostInc() isn't working right");
                    }
                    c = iter.CurrentCodePoint;
                    if (c != UCharacterIterator.DONE && c != UTF16.CharAt(text, i))
                    {
                        Errln("current() after nextCodePointPostInc() isn't working right");
                    }
                } while (c != UCharacterIterator.DONE);
                c = iter.NextCodePoint();
                if (c != UCharacterIterator.DONE)
                {
                    Errln("nextCodePointPostInc() didn't return DONE at the beginning");
                }
            }
        }

C# (CSharp) UCharacterIterator.GetText Examples