public static StringBuffer ConvertToUnicode(UCharacterIterator src, IDNA2003Options options) { bool[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints //bool srcIsLDH = true; //get the options //bool useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); //int failPos = -1; int ch; int saveIndex = src.Index; // step 1: find out if all the codepoints in src are ASCII while ((ch = src.Next()) != UCharacterIterator.DONE) { if (ch > 0x7F) { srcIsASCII = false; }/*else if((srcIsLDH = isLDHChar(ch))==false){ * failPos = src.getIndex(); * }*/ } StringBuffer processOut; if (srcIsASCII == false) { try { // step 2: process the string src.Index = saveIndex; processOut = namePrep.Prepare(src, (StringPrepOptions)options); } catch (StringPrepParseException ex) { return(new StringBuffer(src.GetText())); } } else { //just point to source processOut = new StringBuffer(src.GetText()); } // TODO: // The RFC states that // <quote> // ToUnicode never fails. If any step fails, then the original input // is returned immediately in that step. // </quote> //step 3: verify ACE Prefix if (StartsWithPrefix(processOut)) { StringBuffer decodeOut = null; //step 4: Remove the ACE Prefix string temp = processOut.ToString(ACE_PREFIX.Length, processOut.Length - ACE_PREFIX.Length); //step 5: Decode using punycode try { decodeOut = new StringBuffer(Punycode.Decode(temp, caseFlags).ToString()); } catch (StringPrepParseException e) { decodeOut = null; } //step 6:Apply toASCII if (decodeOut != null) { StringBuffer toASCIIOut = ConvertToASCII(UCharacterIterator.GetInstance(decodeOut), options); //step 7: verify if (CompareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) { // throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed", // StringPrepParseException.VERIFICATION_ERROR); decodeOut = null; } } //step 8: return output of step 5 if (decodeOut != null) { return(decodeOut); } } // }else{ // // verify that STD3 ASCII rules are satisfied // if(useSTD3ASCIIRules == true){ // if( srcIsLDH == false /* source contains some non-LDH characters */ // || processOut.charAt(0) == HYPHEN // || processOut.charAt(processOut.Length-1) == HYPHEN){ // // if(srcIsLDH==false){ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(), // (failPos>0) ? (failPos-1) : failPos); // }else if(processOut.charAt(0) == HYPHEN){ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(),0); // // }else{ // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", // StringPrepParseException.STD3_ASCII_RULES_ERROR, // processOut.toString(), // processOut.Length); // // } // } // } // // just return the source // return new StringBuffer(src.getText()); // } return(new StringBuffer(src.GetText())); }
// ICU4N specific - ReplaceLabel(StringBuilder dest, int destLabelStart, int destLabelLength, // ICharSequence label, int labelLength) moved to UTS46Extension.tt // returns the new label length private int ProcessLabel(StringBuilder dest, int labelStart, int labelLength, bool toASCII, IDNAInfo info) { StringBuilder fromPunycode; StringBuilder labelString; int destLabelStart = labelStart; int destLabelLength = labelLength; bool wasPunycode; if (labelLength >= 4 && dest[labelStart] == 'x' && dest[labelStart + 1] == 'n' && dest[labelStart + 2] == '-' && dest[labelStart + 3] == '-' ) { // Label starts with "xn--", try to un-Punycode it. wasPunycode = true; try { fromPunycode = Punycode.Decode(dest.SubSequence(labelStart + 4, labelStart + labelLength), null); } catch (StringPrepParseException e) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.Punycode); #pragma warning restore 612, 618 return(MarkBadACELabel(dest, labelStart, labelLength, toASCII, info)); } // Check for NFC, and for characters that are not // valid or deviation characters according to the normalizer. // If there is something wrong, then the string will change. // Note that the normalizer passes through non-LDH ASCII and deviation characters. // Deviation characters are ok in Punycode even in transitional processing. // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. bool isValid = uts46Norm2.IsNormalized(fromPunycode); if (!isValid) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.InvalidAceLabel); #pragma warning restore 612, 618 return(MarkBadACELabel(dest, labelStart, labelLength, toASCII, info)); } labelString = fromPunycode; labelStart = 0; labelLength = fromPunycode.Length; } else { wasPunycode = false; labelString = dest; } // Validity check if (labelLength == 0) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.EmptyLabel); return(ReplaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength)); } // labelLength>0 if (labelLength >= 4 && labelString[labelStart + 2] == '-' && labelString[labelStart + 3] == '-') { // label starts with "??--" AddLabelError(info, IDNAError.Hyphen_3_4); } if (labelString[labelStart] == '-') { // label starts with "-" AddLabelError(info, IDNAError.LeadingHyphen); } if (labelString[labelStart + labelLength - 1] == '-') { // label ends with "-" AddLabelError(info, IDNAError.TrailingHyphen); } #pragma warning restore 612, 618 // If the label was not a Punycode label, then it was the result of // mapping, normalization and label segmentation. // If the label was in Punycode, then we mapped it again above // and checked its validity. // Now we handle the STD3 restriction to LDH characters (if set) // and we look for U+FFFD which indicates disallowed characters // in a non-Punycode label or U+FFFD itself in a Punycode label. // We also check for dots which can come from the input to a single-label function. // Ok to cast away const because we own the UnicodeString. int i = labelStart; int limit = labelStart + labelLength; char oredChars = (char)0; // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed. bool disallowNonLDHDot = (options & UTS46Options.UseSTD3Rules) != 0; do { char c = labelString[i]; if (c <= 0x7f) { if (c == '.') { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelHasDot); #pragma warning restore 612, 618 labelString[i] = '\ufffd'; } else if (disallowNonLDHDot && asciiData[c] < 0) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.Disallowed); #pragma warning restore 612, 618 labelString[i] = '\ufffd'; } } else { oredChars |= c; if (disallowNonLDHDot && IsNonASCIIDisallowedSTD3Valid(c)) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.Disallowed); #pragma warning restore 612, 618 labelString[i] = '\ufffd'; } else if (c == 0xfffd) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.Disallowed); #pragma warning restore 612, 618 } } ++i; } while (i < limit); // Check for a leading combining mark after other validity checks // so that we don't report IDNA.Error.DISALLOWED for the U+FFFD from here. int c2; // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD. c2 = labelString.CodePointAt(labelStart); if ((U_GET_GC_MASK(c2) & U_GC_M_MASK) != 0) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LeadingCombiningMark); #pragma warning restore 612, 618 labelString[labelStart] = '\ufffd'; if (c2 > 0xffff) { // Remove c's trail surrogate. labelString.Remove(labelStart + 1, 1); --labelLength; if (labelString == dest) { --destLabelLength; } } } #pragma warning disable 612, 618 if (!HasCertainLabelErrors(info, severeErrors)) #pragma warning restore 612, 618 { // Do contextual checks only if we do not have U+FFFD from a severe error // because U+FFFD can make these checks fail. if ((options & UTS46Options.CheckBiDi) != 0 && #pragma warning disable 612, 618 (!IsBiDi(info) || IsOkBiDi(info))) #pragma warning restore 612, 618 { CheckLabelBiDi(labelString, labelStart, labelLength, info); } if ((options & UTS46Options.CheckContextJ) != 0 && (oredChars & 0x200c) == 0x200c && !IsLabelOkContextJ(labelString, labelStart, labelLength) ) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.ContextJ); #pragma warning restore 612, 618 } if ((options & UTS46Options.CheckContextO) != 0 && oredChars >= 0xb7) { CheckLabelContextO(labelString, labelStart, labelLength, info); } if (toASCII) { if (wasPunycode) { // Leave a Punycode label unchanged if it has no severe errors. if (destLabelLength > 63) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelTooLong); #pragma warning restore 612, 618 } return(destLabelLength); } else if (oredChars >= 0x80) { // Contains non-ASCII characters. StringBuilder punycode; try { punycode = Punycode.Encode(labelString.SubSequence(labelStart, labelStart + labelLength), null); } catch (StringPrepParseException e) { throw new ICUException(e); // unexpected } punycode.Insert(0, "xn--"); if (punycode.Length > 63) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelTooLong); #pragma warning restore 612, 618 } return(ReplaceLabel(dest, destLabelStart, destLabelLength, punycode, punycode.Length)); } else { // all-ASCII label if (labelLength > 63) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.LabelTooLong); #pragma warning restore 612, 618 } } } } else { // If a Punycode label has severe errors, // then leave it but make sure it does not look valid. if (wasPunycode) { #pragma warning disable 612, 618 AddLabelError(info, IDNAError.InvalidAceLabel); #pragma warning restore 612, 618 return(MarkBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info)); } } return(ReplaceLabel(dest, destLabelStart, destLabelLength, labelString, labelLength)); }
public static StringBuffer ConvertToASCII(UCharacterIterator src, IDNA2003Options options) { bool[] caseFlags = null; // the source contains all ascii codepoints bool srcIsASCII = true; // assume the source contains all LDH codepoints bool srcIsLDH = true; //get the options bool useSTD3ASCIIRules = ((options & IDNA2003Options.UseSTD3Rules) != 0); int ch; // step 1 while ((ch = src.Next()) != UCharacterIterator.DONE) { if (ch > 0x7f) { srcIsASCII = false; } } int failPos = -1; src.SetToStart(); StringBuffer processOut = null; // step 2 is performed only if the source contains non ASCII if (!srcIsASCII) { // step 2 processOut = namePrep.Prepare(src, (StringPrepOptions)options); } else { processOut = new StringBuffer(src.GetText()); } int poLen = processOut.Length; if (poLen == 0) { throw new StringPrepParseException("Found zero length lable after NamePrep.", StringPrepErrorType.ZeroLengthLabel); } StringBuffer dest = new StringBuffer(); // reset the variable to verify if output of prepare is ASCII or not srcIsASCII = true; // step 3 & 4 for (int j = 0; j < poLen; j++) { ch = processOut[j]; if (ch > 0x7F) { srcIsASCII = false; } else if (IsLDHChar(ch) == false) { // here we do not assemble surrogates // since we know that LDH code points // are in the ASCII range only srcIsLDH = false; failPos = j; } } if (useSTD3ASCIIRules == true) { // verify 3a and 3b if (srcIsLDH == false || /* source contains some non-LDH characters */ processOut[0] == HYPHEN || processOut[processOut.Length - 1] == HYPHEN) { /* populate the parseError struct */ if (srcIsLDH == false) { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), (failPos > 0) ? (failPos - 1) : failPos); } else if (processOut[0] == HYPHEN) { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), 0); } else { throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", StringPrepErrorType.STD3ASCIIRulesError, processOut.ToString(), (poLen > 0) ? poLen - 1 : poLen); } } } if (srcIsASCII) { dest = processOut; } else { // step 5 : verify the sequence does not begin with ACE prefix if (!StartsWithPrefix(processOut)) { //step 6: encode the sequence with punycode caseFlags = new bool[poLen]; StringBuilder punyout = Punycode.Encode(processOut, caseFlags); // convert all codepoints to lower case ASCII StringBuffer lowerOut = ToASCIILower(punyout); //Step 7: prepend the ACE prefix dest.Append(ACE_PREFIX, 0, ACE_PREFIX.Length - 0); // ICU4N: Checked 3rd parameter //Step 6: copy the contents in b2 into dest dest.Append(lowerOut); } else { throw new StringPrepParseException("The input does not start with the ACE Prefix.", StringPrepErrorType.AcePrefixError, processOut.ToString(), 0); } } if (dest.Length > MAX_LABEL_LENGTH) { throw new StringPrepParseException("The labels in the input are too long. Length > 63.", StringPrepErrorType.LabelTooLongError, dest.ToString(), 0); } return(dest); }