Ejemplo n.º 1
0
        public void TestClone()
        {
            UCharacterIterator iterator = UCharacterIterator.GetInstance("testing");
            UCharacterIterator cloned   = (UCharacterIterator)iterator.Clone();
            int completed = 0;

            while (completed != UCharacterIterator.DONE)
            {
                completed = iterator.Next();
                if (completed != cloned.Next())
                {
                    Errln("Cloned operation failed");
                }
            }
        }
Ejemplo n.º 2
0
        public static StringBuffer ConvertToUnicode(UCharacterIterator iter, IDNA2003Options options)
        {
            // the source contains all ascii codepoints
            bool srcIsASCII = true;

            int ch;
            int saveIndex = iter.Index;

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = iter.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                    break;
                }
            }

            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>
            do
            {
                StringBuffer processOut;
                if (srcIsASCII == false)
                {
                    // step 2: process the string
                    iter.Index = (saveIndex);
                    try
                    {
                        processOut = transform.Prepare(iter, (StringPrepOptions)options);
                    }
                    catch (StringPrepParseException e)
                    {
                        break;
                    }
                }
                else
                {
                    // just point to source
                    processOut = new StringBuffer(iter.GetText());
                }

                // step 3: verify ACE Prefix
                if (StartsWithPrefix(processOut))
                {
                    // step 4: Remove the ACE Prefix
                    String temp = processOut.ToString(ACE_PREFIX_LENGTH, processOut.Length - ACE_PREFIX_LENGTH);

                    // step 5: Decode using punycode
                    StringBuffer decodeOut = null;
                    try
                    {
                        decodeOut = PunycodeReference.Decode(new StringBuffer(temp), null);
                    }
                    catch (StringPrepParseException e)
                    {
                        break;
                    }

                    // step 6:Apply toASCII
                    StringBuffer toASCIIOut = ConvertToASCII(decodeOut, options);

                    // step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        break;
                    }
                    // step 8: return output of step 5
                    return(decodeOut);
                }
            } while (false);

            return(new StringBuffer(iter.GetText()));
        }
Ejemplo n.º 3
0
        public static StringBuffer ConvertToASCII(UCharacterIterator srcIter, IDNA2003Options options)
        {
            char[]
            caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            bool srcIsLDH = true;

            //get the options
            bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            int ch;

            // step 1
            while ((ch = srcIter.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7f)
                {
                    srcIsASCII = false;
                }
            }
            int failPos = -1;

            srcIter.SetToStart();
            StringBuffer processOut = null;

            // step 2 is performed only if the source contains non ASCII
            if (!srcIsASCII)
            {
                // step 2
                processOut = transform.Prepare(srcIter, (StringPrepOptions)options);
            }
            else
            {
                processOut = new StringBuffer(srcIter.GetText());
            }
            int poLen = processOut.Length;

            if (poLen == 0)
            {
                throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel);
            }
            StringBuffer dest = new StringBuffer();

            // reset the variable to verify if output of prepare is ASCII or not
            srcIsASCII = true;

            // step 3 & 4
            for (int j = 0; j < poLen; j++)
            {
                ch = processOut[j];
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }
                else if (IsLDHChar(ch) == false)
                {
                    // here we do not assemble surrogates
                    // since we know that LDH code points
                    // are in the ASCII range only
                    srcIsLDH = false;
                    failPos  = j;
                }
            }

            if (useSTD3ASCIIRules == true)
            {
                // verify 3a and 3b
                if (srcIsLDH == false || /* source contains some non-LDH characters */
                    processOut[0] == HYPHEN ||
                    processOut[processOut.Length - 1] == HYPHEN)
                {
                    /* populate the parseError struct */
                    if (srcIsLDH == false)
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError,
                                                           processOut.ToString(),
                                                           (failPos > 0) ? (failPos - 1) : failPos);
                    }
                    else if (processOut[0] == HYPHEN)
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), 0);
                    }
                    else
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError,
                                                           processOut.ToString(),
                                                           (poLen > 0) ? poLen - 1 : poLen);
                    }
                }
            }
            if (srcIsASCII)
            {
                dest = processOut;
            }
            else
            {
                // step 5 : verify the sequence does not begin with ACE prefix
                if (!StartsWithPrefix(processOut))
                {
                    //step 6: encode the sequence with punycode
                    StringBuffer punyout = PunycodeReference.Encode(processOut, caseFlags);

                    // convert all codepoints to lower case ASCII
                    StringBuffer lowerOut = ToASCIILower(punyout);

                    //Step 7: prepend the ACE prefix
                    dest.Append(ACE_PREFIX, 0, ACE_PREFIX_LENGTH - 0); // ICU4N: Checked 3rd parameter
                                                                       //Step 6: copy the contents in b2 into dest
                    dest.Append(lowerOut);
                }
                else
                {
                    throw new StringPrepParseException("The input does not start with the ACE Prefix.",
                                                       StringPrepErrorType.AcePrefixError, processOut.ToString(), 0);
                }
            }
            if (dest.Length > MAX_LABEL_LENGTH)
            {
                throw new StringPrepParseException("The labels in the input are too long. Length > 64.",
                                                   StringPrepErrorType.LabelTooLongError, dest.ToString(), 0);
            }
            return(dest);
        }
Ejemplo n.º 4
0
        public static StringBuffer ConvertToUnicode(UCharacterIterator src, IDNA2003Options options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            //bool srcIsLDH = true;

            //get the options
            //bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            //int failPos = -1;
            int ch;
            int saveIndex = src.Index;

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = src.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }/*else if((srcIsLDH = isLDHChar(ch))==false){
                  * failPos = src.getIndex();
                  * }*/
            }
            StringBuffer processOut;

            if (srcIsASCII == false)
            {
                try
                {
                    // step 2: process the string
                    src.Index  = saveIndex;
                    processOut = namePrep.Prepare(src, (StringPrepOptions)options);
                }
                catch (StringPrepParseException ex)
                {
                    return(new StringBuffer(src.GetText()));
                }
            }
            else
            {
                //just point to source
                processOut = new StringBuffer(src.GetText());
            }
            // TODO:
            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>

            //step 3: verify ACE Prefix
            if (StartsWithPrefix(processOut))
            {
                StringBuffer decodeOut = null;

                //step 4: Remove the ACE Prefix
                string temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length);

                //step 5: Decode using punycode
                try
                {
                    decodeOut = new StringBuffer(Punycode.Decode(temp, caseFlags).ToString());
                }
                catch (StringPrepParseException e)
                {
                    decodeOut = null;
                }

                //step 6:Apply toASCII
                if (decodeOut != null)
                {
                    StringBuffer toASCIIOut = ConvertToASCII(UCharacterIterator.GetInstance(decodeOut), options);

                    //step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        //                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
                        //                                             StringPrepParseException.VERIFICATION_ERROR);
                        decodeOut = null;
                    }
                }

                //step 8: return output of step 5
                if (decodeOut != null)
                {
                    return(decodeOut);
                }
            }

            //        }else{
            //            // verify that STD3 ASCII rules are satisfied
            //            if(useSTD3ASCIIRules == true){
            //                if( srcIsLDH == false /* source contains some non-LDH characters */
            //                    || processOut.charAt(0) ==  HYPHEN
            //                    || processOut.charAt(processOut.Length-1) == HYPHEN){
            //
            //                    if(srcIsLDH==false){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
            //                                                 (failPos>0) ? (failPos-1) : failPos);
            //                    }else if(processOut.charAt(0) == HYPHEN){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),0);
            //
            //                    }else{
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),
            //                                                 processOut.Length);
            //
            //                    }
            //                }
            //            }
            //            // just return the source
            //            return new StringBuffer(src.getText());
            //        }

            return(new StringBuffer(src.GetText()));
        }
Ejemplo n.º 5
0
        public void TestIteration()
        {
            UCharacterIterator iterator = UCharacterIterator.GetInstance(
                ITERATION_STRING_);
            UCharacterIterator iterator2 = UCharacterIterator.GetInstance(
                ITERATION_STRING_);

            iterator.SetToStart();
            if (iterator.Current != ITERATION_STRING_[0])
            {
                Errln("Iterator failed retrieving first character");
            }
            iterator.SetToLimit();
            if (iterator.Previous() != ITERATION_STRING_[
                    ITERATION_STRING_.Length - 1])
            {
                Errln("Iterator failed retrieving last character");
            }
            if (iterator.Length != ITERATION_STRING_.Length)
            {
                Errln("Iterator failed determining begin and end index");
            }
            iterator2.Index = 0;
            iterator.Index  = 0;
            int ch = 0;

            while (ch != UCharacterIterator.DONE)
            {
                int index = iterator2.Index;
                ch = iterator2.NextCodePoint();
                if (index != ITERATION_SUPPLEMENTARY_INDEX)
                {
                    if (ch != iterator.Next() &&
                        ch != UCharacterIterator.DONE)
                    {
                        Errln("Error mismatch in next() and nextCodePoint()");
                    }
                }
                else
                {
                    if (UTF16.GetLeadSurrogate(ch) != iterator.Next() ||
                        UTF16.GetTrailSurrogate(ch) != iterator.Next())
                    {
                        Errln("Error mismatch in next and nextCodePoint for " +
                              "supplementary characters");
                    }
                }
            }
            iterator.Index  = ITERATION_STRING_.Length;
            iterator2.Index = ITERATION_STRING_.Length;
            while (ch != UCharacterIterator.DONE)
            {
                int index = iterator2.Index;
                ch = iterator2.PreviousCodePoint();
                if (index != ITERATION_SUPPLEMENTARY_INDEX)
                {
                    if (ch != iterator.Previous() &&
                        ch != UCharacterIterator.DONE)
                    {
                        Errln("Error mismatch in previous() and " +
                              "previousCodePoint()");
                    }
                }
                else
                {
                    if (UTF16.GetLeadSurrogate(ch) != iterator.Previous() ||
                        UTF16.GetTrailSurrogate(ch) != iterator.Previous())
                    {
                        Errln("Error mismatch in previous and " +
                              "previousCodePoint for supplementary characters");
                    }
                }
            }
        }
Ejemplo n.º 6
0
        public void TestIterationUChar32()
        {
            String text = "\u0061\u0062\ud841\udc02\u20ac\ud7ff\ud842\udc06\ud801\udc00\u0061";
            int    c;
            int    i;
            {
                UCharacterIterator iter = UCharacterIterator.GetInstance(text);

                String iterText = iter.GetText();
                if (!iterText.Equals(text))
                {
                    Errln("iter.getText() failed");
                }

                iter.Index = (1);
                if (iter.CurrentCodePoint != UTF16.CharAt(text, 1))
                {
                    Errln("Iterator didn't start out in the right place.");
                }

                iter.SetToStart();
                c = iter.CurrentCodePoint;
                i = 0;
                i = iter.MoveCodePointIndex(1);
                c = iter.CurrentCodePoint;
                if (c != UTF16.CharAt(text, 1) || i != 1)
                {
                    Errln("moveCodePointIndex(1) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 1)) + " i= " + i);
                }

                i = iter.MoveCodePointIndex(2);
                c = iter.CurrentCodePoint;
                if (c != UTF16.CharAt(text, 4) || i != 4)
                {
                    Errln("moveCodePointIndex(2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 4)) + " i= " + i);
                }

                i = iter.MoveCodePointIndex(-2);
                c = iter.CurrentCodePoint;
                if (c != UTF16.CharAt(text, 1) || i != 1)
                {
                    Errln("moveCodePointIndex(-2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, 1)) + " i= " + i);
                }

                iter.SetToLimit();
                i = iter.MoveCodePointIndex(-2);
                c = iter.CurrentCodePoint;
                if (c != UTF16.CharAt(text, (text.Length - 3)) || i != (text.Length - 3))
                {
                    Errln("moveCodePointIndex(-2) didn't work correctly expected " + Hex(c) + " got " + Hex(UTF16.CharAt(text, (text.Length - 3))) + " i= " + i);
                }

                iter.SetToStart();
                c = iter.CurrentCodePoint;
                i = 0;

                //testing first32PostInc, nextCodePointPostInc, setTostart
                i = 0;
                iter.SetToStart();
                c = iter.Next();
                if (c != UTF16.CharAt(text, i))
                {
                    Errln("first32PostInc failed.  Expected->" + Hex(UTF16.CharAt(text, i)) + " Got-> " + Hex(c));
                }
                if (iter.Index != UTF16.GetCharCount(c) + i)
                {
                    Errln("getIndex() after first32PostInc() failed");
                }

                iter.SetToStart();
                i = 0;
                if (iter.Index != 0)
                {
                    Errln("setToStart failed");
                }

                Logln("Testing forward iteration...");
                do
                {
                    if (c != UCharacterIterator.DONE)
                    {
                        c = iter.NextCodePoint();
                    }

                    if (c != UTF16.CharAt(text, i))
                    {
                        Errln("Character mismatch at position " + i + ", iterator has " + Hex(c) + ", string has " + Hex(UTF16.CharAt(text, i)));
                    }

                    i += UTF16.GetCharCount(c);
                    if (iter.Index != i)
                    {
                        Errln("getIndex() aftr nextCodePointPostInc() isn't working right");
                    }
                    c = iter.CurrentCodePoint;
                    if (c != UCharacterIterator.DONE && c != UTF16.CharAt(text, i))
                    {
                        Errln("current() after nextCodePointPostInc() isn't working right");
                    }
                } while (c != UCharacterIterator.DONE);
                c = iter.NextCodePoint();
                if (c != UCharacterIterator.DONE)
                {
                    Errln("nextCodePointPostInc() didn't return DONE at the beginning");
                }
            }
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Increments the iterator's index by one and returns the character
 /// at the new index.  If the resulting index is greater or equal
 /// to <see cref="EndIndex"/>, the current index is reset to <see cref="EndIndex"/> and
 /// a value of <see cref="UCharacterIterator.Done"/> is returned.
 /// </summary>
 /// <returns>The character at the new position or <see cref="UCharacterIterator.Done"/> if the new
 /// position is off the end of the text range.</returns>
 public override char Next()
 {
     //pre-increment
     iterator.Next();
     return((char)iterator.Current);
 }
 /// <summary>
 /// Increments the iterator's index by one and returns the character at the
 /// new index. If the resulting index is greater or equal to getEndIndex(),
 /// the current index is reset to getEndIndex() and a value of DONE is
 /// returned.
 /// </summary>
 ///
 /// <returns>the character at the new position or DONE if the new position is
 /// off the end of the text range.</returns>
 public virtual char Next()
 {
     // pre-increment
     iterator.Next();
     return((char)iterator.Current());
 }