Example #1
0
        // ICU4N specific - ReplaceLabel(StringBuilder dest, int destLabelStart, int destLabelLength,
        //    ICharSequence label, int labelLength) moved to UTS46Extension.tt

        // returns the new label length
        private int ProcessLabel(StringBuilder dest,
                                 int labelStart, int labelLength,
                                 bool toASCII,
                                 IDNAInfo info)
        {
            StringBuilder fromPunycode;
            StringBuilder labelString;
            int           destLabelStart  = labelStart;
            int           destLabelLength = labelLength;
            bool          wasPunycode;

            if (labelLength >= 4 &&
                dest[labelStart] == 'x' && dest[labelStart + 1] == 'n' &&
                dest[labelStart + 2] == '-' && dest[labelStart + 3] == '-'
                )
            {
                // Label starts with "xn--", try to un-Punycode it.
                wasPunycode = true;
                try
                {
                    fromPunycode = Punycode.Decode(dest.SubSequence(labelStart + 4, labelStart + labelLength), null);
                }
                catch (StringPrepParseException e)
                {
#pragma warning disable 612, 618
                    AddLabelError(info, IDNAError.Punycode);
#pragma warning restore 612, 618
                    return(MarkBadACELabel(dest, labelStart, labelLength, toASCII, info));
                }
                // Check for NFC, and for characters that are not
                // valid or deviation characters according to the normalizer.
                // If there is something wrong, then the string will change.
                // Note that the normalizer passes through non-LDH ASCII and deviation characters.
                // Deviation characters are ok in Punycode even in transitional processing.
                // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES
                // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too.
                bool isValid = uts46Norm2.IsNormalized(fromPunycode);
                if (!isValid)
                {
#pragma warning disable 612, 618
                    AddLabelError(info, IDNAError.InvalidAceLabel);
#pragma warning restore 612, 618
                    return(MarkBadACELabel(dest, labelStart, labelLength, toASCII, info));
                }
                labelString = fromPunycode;
                labelStart  = 0;
                labelLength = fromPunycode.Length;
            }
            else
            {
                wasPunycode = false;
                labelString = dest;
            }
            // Validity check
            if (labelLength == 0)
            {
#pragma warning disable 612, 618
                AddLabelError(info, IDNAError.EmptyLabel);
                return(ReplaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength));
            }
            // labelLength>0
            if (labelLength >= 4 && labelString[labelStart + 2] == '-' && labelString[labelStart + 3] == '-')
            {
                // label starts with "??--"
                AddLabelError(info, IDNAError.Hyphen_3_4);
            }
            if (labelString[labelStart] == '-')
            {
                // label starts with "-"
                AddLabelError(info, IDNAError.LeadingHyphen);
            }
            if (labelString[labelStart + labelLength - 1] == '-')
            {
                // label ends with "-"
                AddLabelError(info, IDNAError.TrailingHyphen);
            }
#pragma warning restore 612, 618
            // If the label was not a Punycode label, then it was the result of
            // mapping, normalization and label segmentation.
            // If the label was in Punycode, then we mapped it again above
            // and checked its validity.
            // Now we handle the STD3 restriction to LDH characters (if set)
            // and we look for U+FFFD which indicates disallowed characters
            // in a non-Punycode label or U+FFFD itself in a Punycode label.
            // We also check for dots which can come from the input to a single-label function.
            // Ok to cast away const because we own the UnicodeString.
            int  i         = labelStart;
            int  limit     = labelStart + labelLength;
            char oredChars = (char)0;
            // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed.
            bool disallowNonLDHDot = (options & UTS46Options.UseSTD3Rules) != 0;
            do
            {
                char c = labelString[i];
                if (c <= 0x7f)
                {
                    if (c == '.')
                    {
#pragma warning disable 612, 618
                        AddLabelError(info, IDNAError.LabelHasDot);
#pragma warning restore 612, 618
                        labelString[i] = '\ufffd';
                    }
                    else if (disallowNonLDHDot && asciiData[c] < 0)
                    {
#pragma warning disable 612, 618
                        AddLabelError(info, IDNAError.Disallowed);
#pragma warning restore 612, 618
                        labelString[i] = '\ufffd';
                    }
                }
                else
                {
                    oredChars |= c;
                    if (disallowNonLDHDot && IsNonASCIIDisallowedSTD3Valid(c))
                    {
#pragma warning disable 612, 618
                        AddLabelError(info, IDNAError.Disallowed);
#pragma warning restore 612, 618
                        labelString[i] = '\ufffd';
                    }
                    else if (c == 0xfffd)
                    {
#pragma warning disable 612, 618
                        AddLabelError(info, IDNAError.Disallowed);
#pragma warning restore 612, 618
                    }
                }
                ++i;
            } while (i < limit);
            // Check for a leading combining mark after other validity checks
            // so that we don't report IDNA.Error.DISALLOWED for the U+FFFD from here.
            int c2;
            // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD.
            c2 = labelString.CodePointAt(labelStart);
            if ((U_GET_GC_MASK(c2) & U_GC_M_MASK) != 0)
            {
#pragma warning disable 612, 618
                AddLabelError(info, IDNAError.LeadingCombiningMark);
#pragma warning restore 612, 618
                labelString[labelStart] = '\ufffd';
                if (c2 > 0xffff)
                {
                    // Remove c's trail surrogate.
                    labelString.Remove(labelStart + 1, 1);
                    --labelLength;
                    if (labelString == dest)
                    {
                        --destLabelLength;
                    }
                }
            }
#pragma warning disable 612, 618
            if (!HasCertainLabelErrors(info, severeErrors))
#pragma warning restore 612, 618
            {
                // Do contextual checks only if we do not have U+FFFD from a severe error
                // because U+FFFD can make these checks fail.
                if ((options & UTS46Options.CheckBiDi) != 0 &&
#pragma warning disable 612, 618
                    (!IsBiDi(info) || IsOkBiDi(info)))
#pragma warning restore 612, 618
                {
                    CheckLabelBiDi(labelString, labelStart, labelLength, info);
                }
                if ((options & UTS46Options.CheckContextJ) != 0 && (oredChars & 0x200c) == 0x200c &&
                    !IsLabelOkContextJ(labelString, labelStart, labelLength)
                    )
                {
#pragma warning disable 612, 618
                    AddLabelError(info, IDNAError.ContextJ);
#pragma warning restore 612, 618
                }
                if ((options & UTS46Options.CheckContextO) != 0 && oredChars >= 0xb7)
                {
                    CheckLabelContextO(labelString, labelStart, labelLength, info);
                }
                if (toASCII)
                {
                    if (wasPunycode)
                    {
                        // Leave a Punycode label unchanged if it has no severe errors.
                        if (destLabelLength > 63)
                        {
#pragma warning disable 612, 618
                            AddLabelError(info, IDNAError.LabelTooLong);
#pragma warning restore 612, 618
                        }
                        return(destLabelLength);
                    }
                    else if (oredChars >= 0x80)
                    {
                        // Contains non-ASCII characters.
                        StringBuilder punycode;
                        try
                        {
                            punycode = Punycode.Encode(labelString.SubSequence(labelStart, labelStart + labelLength), null);
                        }
                        catch (StringPrepParseException e)
                        {
                            throw new ICUException(e);  // unexpected
                        }
                        punycode.Insert(0, "xn--");
                        if (punycode.Length > 63)
                        {
#pragma warning disable 612, 618
                            AddLabelError(info, IDNAError.LabelTooLong);
#pragma warning restore 612, 618
                        }
                        return(ReplaceLabel(dest, destLabelStart, destLabelLength,
                                            punycode, punycode.Length));
                    }
                    else
                    {
                        // all-ASCII label
                        if (labelLength > 63)
                        {
#pragma warning disable 612, 618
                            AddLabelError(info, IDNAError.LabelTooLong);
#pragma warning restore 612, 618
                        }
                    }
                }
            }
            else
            {
                // If a Punycode label has severe errors,
                // then leave it but make sure it does not look valid.
                if (wasPunycode)
                {
#pragma warning disable 612, 618
                    AddLabelError(info, IDNAError.InvalidAceLabel);
#pragma warning restore 612, 618
                    return(MarkBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info));
                }
            }
            return(ReplaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength));
        }
Example #2
0
        public static StringBuffer ConvertToUnicode(UCharacterIterator src, IDNA2003Options options)
        {
            bool[] caseFlags = null;

            // the source contains all ascii codepoints
            bool srcIsASCII = true;
            // assume the source contains all LDH codepoints
            //bool srcIsLDH = true;

            //get the options
            //bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

            //int failPos = -1;
            int ch;
            int saveIndex = src.Index;

            // step 1: find out if all the codepoints in src are ASCII
            while ((ch = src.Next()) != UCharacterIterator.DONE)
            {
                if (ch > 0x7F)
                {
                    srcIsASCII = false;
                }/*else if((srcIsLDH = isLDHChar(ch))==false){
                  * failPos = src.getIndex();
                  * }*/
            }
            StringBuffer processOut;

            if (srcIsASCII == false)
            {
                try
                {
                    // step 2: process the string
                    src.Index  = saveIndex;
                    processOut = namePrep.Prepare(src, (StringPrepOptions)options);
                }
                catch (StringPrepParseException ex)
                {
                    return(new StringBuffer(src.GetText()));
                }
            }
            else
            {
                //just point to source
                processOut = new StringBuffer(src.GetText());
            }
            // TODO:
            // The RFC states that
            // <quote>
            // ToUnicode never fails. If any step fails, then the original input
            // is returned immediately in that step.
            // </quote>

            //step 3: verify ACE Prefix
            if (StartsWithPrefix(processOut))
            {
                StringBuffer decodeOut = null;

                //step 4: Remove the ACE Prefix
                string temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length);

                //step 5: Decode using punycode
                try
                {
                    decodeOut = new StringBuffer(Punycode.Decode(temp, caseFlags).ToString());
                }
                catch (StringPrepParseException e)
                {
                    decodeOut = null;
                }

                //step 6:Apply toASCII
                if (decodeOut != null)
                {
                    StringBuffer toASCIIOut = ConvertToASCII(UCharacterIterator.GetInstance(decodeOut), options);

                    //step 7: verify
                    if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0)
                    {
                        //                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
                        //                                             StringPrepParseException.VERIFICATION_ERROR);
                        decodeOut = null;
                    }
                }

                //step 8: return output of step 5
                if (decodeOut != null)
                {
                    return(decodeOut);
                }
            }

            //        }else{
            //            // verify that STD3 ASCII rules are satisfied
            //            if(useSTD3ASCIIRules == true){
            //                if( srcIsLDH == false /* source contains some non-LDH characters */
            //                    || processOut.charAt(0) ==  HYPHEN
            //                    || processOut.charAt(processOut.Length-1) == HYPHEN){
            //
            //                    if(srcIsLDH==false){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
            //                                                 (failPos>0) ? (failPos-1) : failPos);
            //                    }else if(processOut.charAt(0) == HYPHEN){
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),0);
            //
            //                    }else{
            //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
            //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
            //                                                 processOut.toString(),
            //                                                 processOut.Length);
            //
            //                    }
            //                }
            //            }
            //            // just return the source
            //            return new StringBuffer(src.getText());
            //        }

            return(new StringBuffer(src.GetText()));
        }