Beispiel #1
0
        public static StringBuffer ConvertToUnicode(UCharacterIterator src, IDNA2003Options options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            //bool srcIsLDH = true;

            //get the options
            //bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            //int failPos = -1;
            int ch;
            int saveIndex = src.Index;

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = src.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }/*else if((srcIsLDH = isLDHChar(ch))==false){
                  * failPos = src.getIndex();
                  * }*/
            }
            StringBuffer processOut;

            if (srcIsASCII == false)
            {
                try
                {
                    // step 2: process the string
                    src.Index  = saveIndex;
                    processOut = namePrep.Prepare(src, (StringPrepOptions)options);
                }
                catch (StringPrepParseException ex)
                {
                    return(new StringBuffer(src.GetText()));
                }
            }
            else
            {
                //just point to source
                processOut = new StringBuffer(src.GetText());
            }
            // TODO:
            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>

            //step 3: verify ACE Prefix
            if (StartsWithPrefix(processOut))
            {
                StringBuffer decodeOut = null;

                //step 4: Remove the ACE Prefix
                string temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length);

                //step 5: Decode using punycode
                try
                {
                    decodeOut = new StringBuffer(Punycode.Decode(temp, caseFlags).ToString());
                }
                catch (StringPrepParseException e)
                {
                    decodeOut = null;
                }

                //step 6:Apply toASCII
                if (decodeOut != null)
                {
                    StringBuffer toASCIIOut = ConvertToASCII(UCharacterIterator.GetInstance(decodeOut), options);

                    //step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        //                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
                        //                                             StringPrepParseException.VERIFICATION_ERROR);
                        decodeOut = null;
                    }
                }

                //step 8: return output of step 5
                if (decodeOut != null)
                {
                    return(decodeOut);
                }
            }

            //        }else{
            //            // verify that STD3 ASCII rules are satisfied
            //            if(useSTD3ASCIIRules == true){
            //                if( srcIsLDH == false /* source contains some non-LDH characters */
            //                    || processOut.charAt(0) ==  HYPHEN
            //                    || processOut.charAt(processOut.Length-1) == HYPHEN){
            //
            //                    if(srcIsLDH==false){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
            //                                                 (failPos>0) ? (failPos-1) : failPos);
            //                    }else if(processOut.charAt(0) == HYPHEN){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),0);
            //
            //                    }else{
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),
            //                                                 processOut.Length);
            //
            //                    }
            //                }
            //            }
            //            // just return the source
            //            return new StringBuffer(src.getText());
            //        }

            return(new StringBuffer(src.GetText()));
        }
Beispiel #2
0
        // ICU4N specific - ReplaceLabel(StringBuilder dest, int destLabelStart, int destLabelLength,
        //    ICharSequence label, int labelLength) moved to UTS46Extension.tt

        // returns the new label length
        private int ProcessLabel(StringBuilder dest,
                                 int labelStart, int labelLength,
                                 bool toASCII,
                                 IDNAInfo info)
        {
            StringBuilder fromPunycode;
            StringBuilder labelString;
            int           destLabelStart  = labelStart;
            int           destLabelLength = labelLength;
            bool          wasPunycode;

            if (labelLength >= 4 &&
                dest[labelStart] == 'x' && dest[labelStart + 1] == 'n' &&
                dest[labelStart + 2] == '-' && dest[labelStart + 3] == '-'
                )
            {
                // Label starts with "xn--", try to un-Punycode it.
                wasPunycode = true;
                try
                {
                    fromPunycode = Punycode.Decode(dest.SubSequence(labelStart + 4, labelStart + labelLength), null);
                }
                catch (StringPrepParseException e)
                {
#pragma warning disable 612, 618
                    AddLabelError(info, IDNAError.Punycode);
#pragma warning restore 612, 618
                    return(MarkBadACELabel(dest, labelStart, labelLength, toASCII, info));
                }
                // Check for NFC, and for characters that are not
                // valid or deviation characters according to the normalizer.
                // If there is something wrong, then the string will change.
                // Note that the normalizer passes through non-LDH ASCII and deviation characters.
                // Deviation characters are ok in Punycode even in transitional processing.
                // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES
                // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too.
                bool isValid = uts46Norm2.IsNormalized(fromPunycode);
                if (!isValid)
                {
#pragma warning disable 612, 618
                    AddLabelError(info, IDNAError.InvalidAceLabel);
#pragma warning restore 612, 618
                    return(MarkBadACELabel(dest, labelStart, labelLength, toASCII, info));
                }
                labelString = fromPunycode;
                labelStart  = 0;
                labelLength = fromPunycode.Length;
            }
            else
            {
                wasPunycode = false;
                labelString = dest;
            }
            // Validity check
            if (labelLength == 0)
            {
#pragma warning disable 612, 618
                AddLabelError(info, IDNAError.EmptyLabel);
                return(ReplaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength));
            }
            // labelLength>0
            if (labelLength >= 4 && labelString[labelStart + 2] == '-' && labelString[labelStart + 3] == '-')
            {
                // label starts with "??--"
                AddLabelError(info, IDNAError.Hyphen_3_4);
            }
            if (labelString[labelStart] == '-')
            {
                // label starts with "-"
                AddLabelError(info, IDNAError.LeadingHyphen);
            }
            if (labelString[labelStart + labelLength - 1] == '-')
            {
                // label ends with "-"
                AddLabelError(info, IDNAError.TrailingHyphen);
            }
#pragma warning restore 612, 618
            // If the label was not a Punycode label, then it was the result of
            // mapping, normalization and label segmentation.
            // If the label was in Punycode, then we mapped it again above
            // and checked its validity.
            // Now we handle the STD3 restriction to LDH characters (if set)
            // and we look for U+FFFD which indicates disallowed characters
            // in a non-Punycode label or U+FFFD itself in a Punycode label.
            // We also check for dots which can come from the input to a single-label function.
            // Ok to cast away const because we own the UnicodeString.
            int  i         = labelStart;
            int  limit     = labelStart + labelLength;
            char oredChars = (char)0;
            // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed.
            bool disallowNonLDHDot = (options & UTS46Options.UseSTD3Rules) != 0;
            do
            {
                char c = labelString[i];
                if (c <= 0x7f)
                {
                    if (c == '.')
                    {
#pragma warning disable 612, 618
                        AddLabelError(info, IDNAError.LabelHasDot);
#pragma warning restore 612, 618
                        labelString[i] = '\ufffd';
                    }
                    else if (disallowNonLDHDot && asciiData[c] < 0)
                    {
#pragma warning disable 612, 618
                        AddLabelError(info, IDNAError.Disallowed);
#pragma warning restore 612, 618
                        labelString[i] = '\ufffd';
                    }
                }
                else
                {
                    oredChars |= c;
                    if (disallowNonLDHDot && IsNonASCIIDisallowedSTD3Valid(c))
                    {
#pragma warning disable 612, 618
                        AddLabelError(info, IDNAError.Disallowed);
#pragma warning restore 612, 618
                        labelString[i] = '\ufffd';
                    }
                    else if (c == 0xfffd)
                    {
#pragma warning disable 612, 618
                        AddLabelError(info, IDNAError.Disallowed);
#pragma warning restore 612, 618
                    }
                }
                ++i;
            } while (i < limit);
            // Check for a leading combining mark after other validity checks
            // so that we don't report IDNA.Error.DISALLOWED for the U+FFFD from here.
            int c2;
            // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD.
            c2 = labelString.CodePointAt(labelStart);
            if ((U_GET_GC_MASK(c2) & U_GC_M_MASK) != 0)
            {
#pragma warning disable 612, 618
                AddLabelError(info, IDNAError.LeadingCombiningMark);
#pragma warning restore 612, 618
                labelString[labelStart] = '\ufffd';
                if (c2 > 0xffff)
                {
                    // Remove c's trail surrogate.
                    labelString.Remove(labelStart + 1, 1);
                    --labelLength;
                    if (labelString == dest)
                    {
                        --destLabelLength;
                    }
                }
            }
#pragma warning disable 612, 618
            if (!HasCertainLabelErrors(info, severeErrors))
#pragma warning restore 612, 618
            {
                // Do contextual checks only if we do not have U+FFFD from a severe error
                // because U+FFFD can make these checks fail.
                if ((options & UTS46Options.CheckBiDi) != 0 &&
#pragma warning disable 612, 618
                    (!IsBiDi(info) || IsOkBiDi(info)))
#pragma warning restore 612, 618
                {
                    CheckLabelBiDi(labelString, labelStart, labelLength, info);
                }
                if ((options & UTS46Options.CheckContextJ) != 0 && (oredChars & 0x200c) == 0x200c &&
                    !IsLabelOkContextJ(labelString, labelStart, labelLength)
                    )
                {
#pragma warning disable 612, 618
                    AddLabelError(info, IDNAError.ContextJ);
#pragma warning restore 612, 618
                }
                if ((options & UTS46Options.CheckContextO) != 0 && oredChars >= 0xb7)
                {
                    CheckLabelContextO(labelString, labelStart, labelLength, info);
                }
                if (toASCII)
                {
                    if (wasPunycode)
                    {
                        // Leave a Punycode label unchanged if it has no severe errors.
                        if (destLabelLength > 63)
                        {
#pragma warning disable 612, 618
                            AddLabelError(info, IDNAError.LabelTooLong);
#pragma warning restore 612, 618
                        }
                        return(destLabelLength);
                    }
                    else if (oredChars >= 0x80)
                    {
                        // Contains non-ASCII characters.
                        StringBuilder punycode;
                        try
                        {
                            punycode = Punycode.Encode(labelString.SubSequence(labelStart, labelStart + labelLength), null);
                        }
                        catch (StringPrepParseException e)
                        {
                            throw new ICUException(e);  // unexpected
                        }
                        punycode.Insert(0, "xn--");
                        if (punycode.Length > 63)
                        {
#pragma warning disable 612, 618
                            AddLabelError(info, IDNAError.LabelTooLong);
#pragma warning restore 612, 618
                        }
                        return(ReplaceLabel(dest, destLabelStart, destLabelLength,
                                            punycode, punycode.Length));
                    }
                    else
                    {
                        // all-ASCII label
                        if (labelLength > 63)
                        {
#pragma warning disable 612, 618
                            AddLabelError(info, IDNAError.LabelTooLong);
#pragma warning restore 612, 618
                        }
                    }
                }
            }
            else
            {
                // If a Punycode label has severe errors,
                // then leave it but make sure it does not look valid.
                if (wasPunycode)
                {
#pragma warning disable 612, 618
                    AddLabelError(info, IDNAError.InvalidAceLabel);
#pragma warning restore 612, 618
                    return(MarkBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info));
                }
            }
            return(ReplaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength));
        }
Beispiel #3
0
        public static StringBuffer ConvertToASCII(UCharacterIterator src, IDNA2003Options options)
        {
            bool[]
            caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            bool srcIsLDH = true;

            //get the options
            bool useSTD3ASCIIRules = ((options & IDNA2003Options.UseSTD3Rules) != 0);
            int  ch;

            // step 1
            while ((ch = src.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7f)
                {
                    srcIsASCII = false;
                }
            }
            int failPos = -1;

            src.SetToStart();
            StringBuffer processOut = null;

            // step 2 is performed only if the source contains non ASCII
            if (!srcIsASCII)
            {
                // step 2
                processOut = namePrep.Prepare(src, (StringPrepOptions)options);
            }
            else
            {
                processOut = new StringBuffer(src.GetText());
            }
            int poLen = processOut.Length;

            if (poLen == 0)
            {
                throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel);
            }
            StringBuffer dest = new StringBuffer();

            // reset the variable to verify if output of prepare is ASCII or not
            srcIsASCII = true;

            // step 3 & 4
            for (int j = 0; j < poLen; j++)
            {
                ch = processOut[j];
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }
                else if (IsLDHChar(ch) == false)
                {
                    // here we do not assemble surrogates
                    // since we know that LDH code points
                    // are in the ASCII range only
                    srcIsLDH = false;
                    failPos  = j;
                }
            }

            if (useSTD3ASCIIRules == true)
            {
                // verify 3a and 3b
                if (srcIsLDH == false || /* source contains some non-LDH characters */
                    processOut[0] == HYPHEN ||
                    processOut[processOut.Length - 1] == HYPHEN)
                {
                    /* populate the parseError struct */
                    if (srcIsLDH == false)
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError,
                                                           processOut.ToString(),
                                                           (failPos > 0) ? (failPos - 1) : failPos);
                    }
                    else if (processOut[0] == HYPHEN)
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), 0);
                    }
                    else
                    {
                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
                                                           StringPrepErrorType.STD3ASCIIRulesError,
                                                           processOut.ToString(),
                                                           (poLen > 0) ? poLen - 1 : poLen);
                    }
                }
            }
            if (srcIsASCII)
            {
                dest = processOut;
            }
            else
            {
                // step 5 : verify the sequence does not begin with ACE prefix
                if (!StartsWithPrefix(processOut))
                {
                    //step 6: encode the sequence with punycode
                    caseFlags = new bool[poLen];

                    StringBuilder punyout = Punycode.Encode(processOut, caseFlags);

                    // convert all codepoints to lower case ASCII
                    StringBuffer lowerOut = ToASCIILower(punyout);

                    //Step 7: prepend the ACE prefix
                    dest.Append(ACE_PREFIX, 0, ACE_PREFIX.Length - 0); // ICU4N: Checked 3rd parameter
                                                                       //Step 6: copy the contents in b2 into dest
                    dest.Append(lowerOut);
                }
                else
                {
                    throw new StringPrepParseException("The input does not start with the ACE Prefix.",
                                                       StringPrepErrorType.AcePrefixError, processOut.ToString(), 0);
                }
            }
            if (dest.Length > MAX_LABEL_LENGTH)
            {
                throw new StringPrepParseException("The labels in the input are too long. Length > 63.",
                                                   StringPrepErrorType.LabelTooLongError, dest.ToString(), 0);
            }
            return(dest);
        }