private int MarkBadACELabel(StringBuilder dest, int labelStart, int labelLength, bool toASCII, IDNAInfo info) { bool disallowNonLDHDot = (options & UTS46Options.UseSTD3Rules) != 0; bool isASCII = true; bool onlyLDH = true; int i = labelStart + 4; // After the initial "xn--". int limit = labelStart + labelLength; do { char c = dest[i]; if (c <= 0x7f) { if (c == '.') { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelHasDot); #pragma warning restore 612, 618 dest[i] = '\ufffd'; isASCII = onlyLDH = false; } else if (asciiData[c] < 0) { onlyLDH = false; if (disallowNonLDHDot) { dest[i] = '\ufffd'; isASCII = false; } } } else { isASCII = onlyLDH = false; } } while (++i < limit); if (onlyLDH) { dest.Insert(labelStart + labelLength, '\ufffd'); ++labelLength; } else { if (toASCII && isASCII && labelLength > 63) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelTooLong); #pragma warning restore 612, 618 } } return(labelLength); }
/// <summary> /// Converts a single domain name label into its Unicode form for human-readable display. /// If any processing step fails, then info.HasErrors will be true. /// The label might be modified according to the types of errors. /// </summary> /// <param name="label">Input domain name label.</param> /// <param name="dest">Destination string object.</param> /// <param name="info">Output container of IDNA processing details.</param> /// <returns><paramref name="dest"/></returns> /// <stable>ICU 4.6</stable> public abstract StringBuilder LabelToUnicode(StringBuilder label, StringBuilder dest, IDNAInfo info);
// ICU4N specific - ReplaceLabel(StringBuilder dest, int destLabelStart, int destLabelLength, // ICharSequence label, int labelLength) moved to UTS46Extension.tt // returns the new label length private int ProcessLabel(StringBuilder dest, int labelStart, int labelLength, bool toASCII, IDNAInfo info) { StringBuilder fromPunycode; StringBuilder labelString; int destLabelStart = labelStart; int destLabelLength = labelLength; bool wasPunycode; if (labelLength >= 4 && dest[labelStart] == 'x' && dest[labelStart + 1] == 'n' && dest[labelStart + 2] == '-' && dest[labelStart + 3] == '-' ) { // Label starts with "xn--", try to un-Punycode it. wasPunycode = true; try { fromPunycode = Punycode.Decode(dest.SubSequence(labelStart + 4, labelStart + labelLength), null); } catch (StringPrepParseException e) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.Punycode); #pragma warning restore 612, 618 return(MarkBadACELabel(dest, labelStart, labelLength, toASCII, info)); } // Check for NFC, and for characters that are not // valid or deviation characters according to the normalizer. // If there is something wrong, then the string will change. // Note that the normalizer passes through non-LDH ASCII and deviation characters. // Deviation characters are ok in Punycode even in transitional processing. // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. bool isValid = uts46Norm2.IsNormalized(fromPunycode); if (!isValid) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.InvalidAceLabel); #pragma warning restore 612, 618 return(MarkBadACELabel(dest, labelStart, labelLength, toASCII, info)); } labelString = fromPunycode; labelStart = 0; labelLength = fromPunycode.Length; } else { wasPunycode = false; labelString = dest; } // Validity check if (labelLength == 0) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.EmptyLabel); return(ReplaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength)); } // labelLength>0 if (labelLength >= 4 && labelString[labelStart + 2] == '-' && labelString[labelStart + 3] == '-') { // label starts with "??--" AddLabelError(info, IDNAError.Hyphen_3_4); } if (labelString[labelStart] == '-') { // label starts with "-" AddLabelError(info, IDNAError.LeadingHyphen); } if (labelString[labelStart + labelLength - 1] == '-') { // label ends with "-" AddLabelError(info, IDNAError.TrailingHyphen); } #pragma warning restore 612, 618 // If the label was not a Punycode label, then it was the result of // mapping, normalization and label segmentation. // If the label was in Punycode, then we mapped it again above // and checked its validity. // Now we handle the STD3 restriction to LDH characters (if set) // and we look for U+FFFD which indicates disallowed characters // in a non-Punycode label or U+FFFD itself in a Punycode label. // We also check for dots which can come from the input to a single-label function. // Ok to cast away const because we own the UnicodeString. int i = labelStart; int limit = labelStart + labelLength; char oredChars = (char)0; // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed. bool disallowNonLDHDot = (options & UTS46Options.UseSTD3Rules) != 0; do { char c = labelString[i]; if (c <= 0x7f) { if (c == '.') { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelHasDot); #pragma warning restore 612, 618 labelString[i] = '\ufffd'; } else if (disallowNonLDHDot && asciiData[c] < 0) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.Disallowed); #pragma warning restore 612, 618 labelString[i] = '\ufffd'; } } else { oredChars |= c; if (disallowNonLDHDot && IsNonASCIIDisallowedSTD3Valid(c)) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.Disallowed); #pragma warning restore 612, 618 labelString[i] = '\ufffd'; } else if (c == 0xfffd) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.Disallowed); #pragma warning restore 612, 618 } } ++i; } while (i < limit); // Check for a leading combining mark after other validity checks // so that we don't report IDNA.Error.DISALLOWED for the U+FFFD from here. int c2; // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD. c2 = labelString.CodePointAt(labelStart); if ((U_GET_GC_MASK(c2) & U_GC_M_MASK) != 0) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LeadingCombiningMark); #pragma warning restore 612, 618 labelString[labelStart] = '\ufffd'; if (c2 > 0xffff) { // Remove c's trail surrogate. labelString.Remove(labelStart + 1, 1); --labelLength; if (labelString == dest) { --destLabelLength; } } } #pragma warning disable 612, 618 if (!HasCertainLabelErrors(info, severeErrors)) #pragma warning restore 612, 618 { // Do contextual checks only if we do not have U+FFFD from a severe error // because U+FFFD can make these checks fail. if ((options & UTS46Options.CheckBiDi) != 0 && #pragma warning disable 612, 618 (!IsBiDi(info) || IsOkBiDi(info))) #pragma warning restore 612, 618 { CheckLabelBiDi(labelString, labelStart, labelLength, info); } if ((options & UTS46Options.CheckContextJ) != 0 && (oredChars & 0x200c) == 0x200c && !IsLabelOkContextJ(labelString, labelStart, labelLength) ) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.ContextJ); #pragma warning restore 612, 618 } if ((options & UTS46Options.CheckContextO) != 0 && oredChars >= 0xb7) { CheckLabelContextO(labelString, labelStart, labelLength, info); } if (toASCII) { if (wasPunycode) { // Leave a Punycode label unchanged if it has no severe errors. if (destLabelLength > 63) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelTooLong); #pragma warning restore 612, 618 } return(destLabelLength); } else if (oredChars >= 0x80) { // Contains non-ASCII characters. StringBuilder punycode; try { punycode = Punycode.Encode(labelString.SubSequence(labelStart, labelStart + labelLength), null); } catch (StringPrepParseException e) { throw new ICUException(e); // unexpected } punycode.Insert(0, "xn--"); if (punycode.Length > 63) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelTooLong); #pragma warning restore 612, 618 } return(ReplaceLabel(dest, destLabelStart, destLabelLength, punycode, punycode.Length)); } else { // all-ASCII label if (labelLength > 63) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelTooLong); #pragma warning restore 612, 618 } } } } else { // If a Punycode label has severe errors, // then leave it but make sure it does not look valid. if (wasPunycode) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.InvalidAceLabel); #pragma warning restore 612, 618 return(MarkBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info)); } } return(ReplaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength)); }
/// <summary> /// Converts a single domain name label into its ASCII form for DNS lookup. /// If any processing step fails, then info.HasErrors will be true and /// the result might not be an ASCII string. /// The label might be modified according to the types of errors. /// Labels with severe errors will be left in (or turned into) their Unicode form. /// </summary> /// <param name="label">Input domain name label.</param> /// <param name="dest">Destination string object.</param> /// <param name="info">Output container of IDNA processing details.</param> /// <returns><paramref name="dest"/></returns> /// <stable>ICU 4.6</stable> public abstract StringBuilder LabelToASCII(char[] label, StringBuilder dest, IDNAInfo info);
/// <summary> /// Converts a single domain name label into its ASCII form for DNS lookup. /// If any processing step fails, then info.HasErrors will be true and /// the result might not be an ASCII string. /// The label might be modified according to the types of errors. /// Labels with severe errors will be left in (or turned into) their Unicode form. /// </summary> /// <param name="label">Input domain name label.</param> /// <param name="dest">Destination string object.</param> /// <param name="info">Output container of IDNA processing details.</param> /// <returns><paramref name="dest"/></returns> /// <stable>ICU 4.6</stable> public abstract StringBuilder LabelToASCII(ICharSequence label, StringBuilder dest, IDNAInfo info);
/// <summary> /// Converts a whole domain name into its Unicode form for human-readable display. /// If any processing step fails, then info.HasErrors will be true. /// The domain name might be modified according to the types of errors. /// </summary> /// <param name="name">Input domain name.</param> /// <param name="dest">Destination string object.</param> /// <param name="info">Output container of IDNA processing details.</param> /// <returns><paramref name="dest"/></returns> /// <stable>ICU 4.6</stable> public abstract StringBuilder NameToUnicode(ICharSequence name, StringBuilder dest, IDNAInfo info);
/// <summary> /// Converts a whole domain name into its Unicode form for human-readable display. /// If any processing step fails, then info.HasErrors will be true. /// The domain name might be modified according to the types of errors. /// </summary> /// <param name="name">Input domain name.</param> /// <param name="dest">Destination string object.</param> /// <param name="info">Output container of IDNA processing details.</param> /// <returns><paramref name="dest"/></returns> /// <stable>ICU 4.6</stable> public abstract StringBuilder NameToUnicode(char[] name, StringBuilder dest, IDNAInfo info);
/// <summary> /// Converts a whole domain name into its ASCII form for DNS lookup. /// If any processing step fails, then info.HasErrors will be true and /// the result might not be an ASCII string. /// The domain name might be modified according to the types of errors. /// Labels with severe errors will be left in (or turned into) their Unicode form. /// </summary> /// <param name="name">Input domain name.</param> /// <param name="dest">Destination string object.</param> /// <param name="info">Output container of IDNA processing details.</param> /// <returns><paramref name="dest"/></returns> /// <stable>ICU 4.6</stable> public abstract StringBuilder NameToASCII(char[] name, StringBuilder dest, IDNAInfo info);
/// <summary> /// Converts a whole domain name into its ASCII form for DNS lookup. /// If any processing step fails, then info.HasErrors will be true and /// the result might not be an ASCII string. /// The domain name might be modified according to the types of errors. /// Labels with severe errors will be left in (or turned into) their Unicode form. /// </summary> /// <param name="name">Input domain name.</param> /// <param name="dest">Destination string object.</param> /// <param name="info">Output container of IDNA processing details.</param> /// <returns><paramref name="dest"/></returns> /// <stable>ICU 4.6</stable> internal abstract StringBuilder NameToASCII(ICharSequence name, StringBuilder dest, IDNAInfo info);
/// <summary> /// Converts a single domain name label into its Unicode form for human-readable display. /// If any processing step fails, then info.HasErrors will be true. /// The label might be modified according to the types of errors. /// </summary> /// <param name="label">Input domain name label.</param> /// <param name="dest">Destination string object.</param> /// <param name="info">Output container of IDNA processing details.</param> /// <returns><paramref name="dest"/></returns> /// <stable>ICU 4.6</stable> internal abstract StringBuilder LabelToUnicode(ICharSequence label, StringBuilder dest, IDNAInfo info);