//--------------------------------------------------------------------- // Public methods //--------------------------------------------------------------------- public UTF32String Prepare(UTF32String pSource, IEncodingOption pOption) { UTF32String output = null; if (null == pSource) { return(new UTF32String()); } // Based on RFC3454: // Step 1 & 2: Map & Normalization output = this.Map(pSource, pOption); // Step 2: Normalization output = this.Normalize(output, pOption); // Step 3: Prohibition output = this.Prohibit(output, pOption); // Step 4: Bidi output = this.Bidirection(output, pOption); // Done return(output); }
//--------------------------------------------------------------------- // Public methods //--------------------------------------------------------------------- public bool IsBeginWithPrefix(UTF32String pSource) { if ((null == pSource) || (0 == pSource.Length)) { return(false); } // Done. return(pSource.StartsWith(this.Prefix)); }
//--------------------------------------------------------------------- // Public methods //--------------------------------------------------------------------- public string Encode(string pSource) { string encoded = null; StringBuilder buffer = null; EncodingOption option = null; UTF32String source = null; UTF32String label = null; UTF32String[] labelArray = null; // check if we have encoder or not if (null == m_converter) { throw new ACEException("No ace converter defined"); } pSource = StringUtil.Normalize(pSource); if (null == pSource) { throw new ACEException("Encoding error, invalid input(null, Encode)"); } // Initializes buffer = new StringBuilder(); source = new UTF32String(pSource); option = new EncodingOption(); labelArray = source.Split(Converter.SEPERATORS); // for each label do the encoding for (int index = 0; index < labelArray.Length; index++) { label = StringUtil.Normalize(labelArray[index]); if (null == label) { throw new ACEException(string.Format("Encoding error, empty label: {0}", pSource.ToString())); } // encode each label encoded = this.Encode(label, option); // append the encoded buffer.Append(encoded); if (index < (labelArray.Length - 1)) { // Based on RFC3492, only allow FULL_STOP in // encoded string as seperator buffer.Append("."); } } // return the encoded string return(buffer.ToString()); }
public static bool IsAllDnsCompatible(UTF32String pSource) { for (int index = 0; index < pSource.Length; index++) { if (!IsCharDnsCompatible(pSource[index])) { return(false); } } // Done return(true); }
public static bool IsAllAscii(UTF32String pSource) { for (int index = 0; index < pSource.Length; index++) { if (pSource[index] > 0x7F) { return(false); } } // Done return(true); }
public string Decode(string pSource) { string label = null; string[] labelArray = null; StringBuilder buffer = null; EncodingOption option = null; UTF32String decoded = null; // check if we have encoder or not if (null == m_converter) { return(null); } if (null == pSource) { return(null); } // Initializes buffer = new StringBuilder(); option = new EncodingOption(); // SHOULD ONLY contain FULL_STOP as seperator(RFC3492) labelArray = pSource.Split('.'); // for each label do the decoding for (int index = 0; index < labelArray.Length; index++) { label = labelArray[index]; decoded = StringUtil.Normalize(this.Decode(label, option)); if (null == decoded) { throw new ACEException(string.Format("Decoding error, empty label: {0}", pSource)); } // append the decoded buffer.Append(decoded.ToUTF16()); if (index < (labelArray.Length - 1)) { // Based on RFC3492, only allow FULL_STOP in // string as seperator after decoding buffer.Append("."); } } // return the decoded string return(buffer.ToString()); }
private UTF32String Bidirection(UTF32String pSource, IEncodingOption pOption) { int cPoint = 0; bool bLeftToRight = false; bool bRightToLeft = false; Direction LastCharDirection = Direction.DIRECTION_NORM; Direction FirstCharDirection = Direction.DIRECTION_NORM; // check if we need to check bidi if (!pOption.IsOptionSet(EncodingOption.CHECK_BIDI)) { return(pSource); } // get the first char direction FirstCharDirection = m_bidirectionMapping.GetDirection(pSource[0]); // for each char, checking it's direction for (int index = 1; index < pSource.Length; index++) { cPoint = pSource[index]; LastCharDirection = m_bidirectionMapping.GetDirection(cPoint); // check Left to right if necessary if (false == bLeftToRight) { bLeftToRight = (LastCharDirection == Direction.DIRECTION_RIGHT); } // check right to left if necesssary if (false == bRightToLeft) { bLeftToRight = (LastCharDirection == Direction.DIRECTION_LEFT); } } // Based on RFC3454 6.2, check if there are both right to left or left to right if (bLeftToRight && bRightToLeft) { throw new BidiCodePointException(string.Format("Invalid bidi code point found[Can't have both 'RightToLeft' and 'LeftToRight' code point]: {0} in {1:X8}\r\n", cPoint, pSource.ToString())); } // Based on RFC3454 6.3, check if there are both right to left if (bRightToLeft && (FirstCharDirection != LastCharDirection)) { throw new BidiCodePointException(string.Format("Invalid bidi code point found[first char and last char of string MUST be both 'RightToLeft']: {0} in {1:X8}\r\n", cPoint, pSource.ToString())); } // Done. return(pSource); }
//--------------------------------------------------------------------- // Public methods //--------------------------------------------------------------------- public override string Encode(UTF32String pSource, bool[] pCaseFlag) { string source = null; byte[] compressed = null; // get the UTF16 string source = pSource.ToUTF16(); // compress compressed = this.Compress(source); // Base32 encoding return(Base32.Encode(compressed)); }
private UTF32String Decode(string pSource, IEncodingOption pOption) { string check = null; UTF32String decoded = null; UTF32String source = null; try { // Initializes source = new UTF32String(pSource); // Step #1-2 if (!Converter.IsAllAscii(source) && (null != m_preparer)) { source = m_preparer.Prepare(source, pOption); } // Step #3-5 if (null != m_converter) { decoded = m_converter.Decode(source.ToUTF16(), new bool[source.Length]); } // Step #6-7 if (pOption.IsOptionSet(EncodingOption.DECODE_DOUBLE_CHECK)) { check = this.Encode(decoded, pOption); if (0 != string.Compare(check, pSource, true)) { throw new ACEException("Decoding round trip check failed"); } } } catch (Exception e) { // Based on RFC3492, decode never fails. // check if we need to allow decode fail if (pOption.IsOptionSet(EncodingOption.ALLOW_DECODE_FAIL)) { throw e; } decoded = new UTF32String(pSource); } // Step #8 return(decoded); }
public static UTF32String Normalize(UTF32String pValue) { // Null? if (null == pValue) { return(null); } pValue = pValue.Trim(); if (null == pValue) { return(null); } // empty? if (0 == pValue.Length) { return(null); } // Normalized return(pValue); }
private UTF32String Prohibit(UTF32String pSource, IEncodingOption pOption) { int cPoint = 0; // valid? if (null == pSource) { return(pSource); } // for each char for (int index = 0; index < pSource.Length; index++) { cPoint = pSource[index]; // check if there is any prohibited code point if (m_prohibitionMapping.IsProhibited(cPoint)) { throw new ProhibitedCodePointException(string.Format("Prohibited code point found: {0} in {1:X8}\r\n", cPoint, pSource.ToString())); } } return(pSource); }
public override bool Validate(UTF32String pSource, IEncodingOption pOption) { return(true); }
public override string Encode(UTF32String pSource, bool[] pCaseFlag) { int currentLargestCP = 0; int delta = 0; int cpsHandled = 0; int bias = 0; int nextLargerCP = 0; int currentDelta = 0; int currentBase = 0; int threshold = 0; int basicCPsCount = 0; StringBuilder buffer = null; // valid? if ((null == pSource) || (0 == pSource.Length)) { throw new PunycodeException("Invalid input(null)"); } // Initializes buffer = new StringBuilder(); bias = (int)Punycode.INITIAL_BIAS; currentLargestCP = (int)Punycode.INITIAL_N; // add all the basic code points to the output string for (int i = 0; i < pSource.Length; i++) { int inputChar = pSource[i]; if (Converter.IsAscii(inputChar)) { if (pCaseFlag != null) { inputChar = PunyConverter.EncodeBasic(inputChar, pCaseFlag[i]); } // Add it to the output buffer.Append((char)inputChar); } } basicCPsCount = buffer.Length; cpsHandled = basicCPsCount; // need to append the delimiter? if (basicCPsCount > 0) { buffer.Append((char)Punycode.DELIMITER); } // Main encoding loop while (cpsHandled < pSource.Length) { // All non-basic code points < n have been // handled already. Find the next larger one: nextLargerCP = int.MaxValue; for (int index = 0; index < pSource.Length; ++index) { int inputChar = pSource[index]; // Get the next largest one. if (inputChar >= currentLargestCP && inputChar < nextLargerCP) { nextLargerCP = inputChar; } } // Increase delta enough to advance the decoder's // <currentLargestCP, i> state to <nextLargerCP, 0>, // but guard against overflow: if ((nextLargerCP - currentLargestCP) > ((int.MaxValue - delta) / (cpsHandled + 1))) { throw new PunycodeException("Punycode Encoding Overflow"); } delta += (nextLargerCP - currentLargestCP) * (cpsHandled + 1); currentLargestCP = nextLargerCP; for (int index = 0; index < pSource.Length; ++index) { int inputChar = pSource[index]; if ((inputChar) < currentLargestCP && ++delta == 0) { throw new PunycodeException("Output too large"); } if (inputChar == currentLargestCP) { for (currentDelta = delta, currentBase = (int)Punycode.BASE; ; currentBase += (int)Punycode.BASE) { if (buffer.Length >= (int)Punycode.PUNYCODE_MAX_LENGTH) { throw new PunycodeException("Output too long"); } // calculate the threshold if (currentBase <= bias) { threshold = (int)Punycode.T_MIN; } else { threshold = (currentBase >= (bias + (int)Punycode.T_MAX)) ? (int)Punycode.T_MAX : currentBase - bias; } if (currentDelta < threshold) { int outputDelta = currentDelta; // determine the current uppercase flag bool ucFlag = (pCaseFlag != null) ? ucFlag = pCaseFlag[index] : false; // encode the delta char encodedDelta = (char)PunyConverter.EncodeDigit(outputDelta, ucFlag); // append the encoded delta to output buffer.Append(encodedDelta); break; } else { int outputDelta; char encodedDelta; outputDelta = threshold + (currentDelta - threshold) % ((int)Punycode.BASE - threshold); // encode the delta encodedDelta = (char)PunyConverter.EncodeDigit(outputDelta, false); // append the encoded delta to output buffer.Append(encodedDelta); // adjust the delta value for next iteration currentDelta = (currentDelta - threshold) / ((int)Punycode.BASE - threshold); } } // Adapt the bias: bias = PunyConverter.Adapt(delta, cpsHandled + 1, cpsHandled == basicCPsCount); delta = 0; ++cpsHandled; } } ++delta; ++currentLargestCP; } if (buffer.Length > (int)Punycode.PUNYCODE_MAX_LENGTH) { throw new PunycodeException(string.Format("Output too long: {0}", buffer.Length)); } // Done. return(buffer.ToString()); }
//--------------------------------------------------------------------- // Private members //--------------------------------------------------------------------- private UTF32String Map(UTF32String pSource, IEncodingOption pOption) { bool bAllowUnassigned = false; bool bNormalize = true; int cPoint = 0; int[] mPoint = null; UTF32String mapped = null; // Initializes mapped = new UTF32String(); bAllowUnassigned = (pOption != null) && pOption.IsOptionSet(EncodingOption.ALLOW_UNASSIGNED); bNormalize = (pOption != null) && pOption.IsOptionSet(EncodingOption.USE_NORMALIZE); // valid? if (null == pSource) { return(mapped); } // for each char for (int index = 0; index < pSource.Length; index++) { // get code point cPoint = pSource[index]; // check if it's unassigned if (bAllowUnassigned && m_unassignedMapping.IsUnassigned(cPoint)) { throw new UnassignedCodePointException(string.Format("Unassigned code point found: {0} in {1:X8}\r\n", cPoint, pSource.ToString())); } // check if there is any map nothing if (m_nothingMapping.IsMapNothing(cPoint)) { continue; } // check the map if (bNormalize) { mPoint = m_normalizedCaseMapping.Mapping(cPoint); } else { mPoint = m_unnormalizedCaseMapping.Mapping(cPoint); } // having mapping? if ((null == mPoint) || (0 == mPoint.Length)) { mPoint = new int [1] { cPoint } } ; // add the mapping to the output for (int mIndex = 0; mIndex < mPoint.Length; mIndex++) { mapped.Append(mPoint[mIndex]); } } //Done return(mapped); }
public UTF32String Unprepare(UTF32String pSource, IEncodingOption pOption) { // no implementation based on RFC3454. Reserved for furture extention. return(pSource); }
//--------------------------------------------------------------------- // Abstract methods //--------------------------------------------------------------------- public abstract string Encode(UTF32String pSource, bool[] pCaseFlag);
public abstract bool Validate(UTF32String pSource, IEncodingOption pOption);
private UTF32String Normalize(UTF32String pSource, IEncodingOption pOption) { return(pSource); }
//--------------------------------------------------------------------- // Private methods //--------------------------------------------------------------------- private string Encode(UTF32String pSource, IEncodingOption pOption) { bool bAllAscii = false; string encoded = null; UTF32String prepared = null; // Step #1: set the flag, all ascii? bAllAscii = Converter.IsAllAscii(pSource); // Step #2 if (!bAllAscii) { // check if we need to prepare the string if (null != m_preparer) { prepared = m_preparer.Prepare(pSource, pOption); } else { prepared = pSource; } } // Step #3: check if we need to apply the rules if (pOption.IsOptionSet(EncodingOption.USE_STD3_RULES)) { // failed on Dns compatible? if (!Converter.IsAllDnsCompatible(prepared)) { throw new Std3RuleCodePointException(string.Format("The input does not conform to the STD 3 ASCII rules(DNS Compatible): {0}", prepared.ToString())); } if (0 < prepared.Length) { // first char is hyphen? if (prepared[0] == Converter.CHAR_HYPHEN) { throw new Std3RuleCodePointException(string.Format("The input does not conform to the STD 3 ASCII rules(Hyphen at the beginning): {0}", prepared.ToString())); } // last char is hyphen? if (prepared[prepared.Length - 1] == Converter.CHAR_HYPHEN) { throw new Std3RuleCodePointException(string.Format("The input does not conform to the STD 3 ASCII rules(Hyphen at the end): {0}", prepared.ToString())); } } } //Step #4: check if it's all ascii already if (!bAllAscii) { // Step #5: check if it begin with the 'prefix' if (m_converter.IsBeginWithPrefix(prepared)) { throw new ACEException(string.Format("The input can't begin with an ACE prefix: {0}", pSource.ToString())); } // Step #6: encoded = m_converter.Encode(prepared, new bool[prepared.Length]); //Step #7: insert the prefix encoded = encoded.Insert(0, m_converter.Prefix); } else { encoded = pSource.ToUTF16(); } // Step #8 if (encoded.Length > Converter.LABEL_MAX_LENGTH) { throw new ACEException(string.Format("Encoded name too long: {0}", encoded.Length)); } // Done return(encoded); }
public override UTF32String Decode(string pSource, bool[] pCaseFlag) { int decodedVal = 0; //int out = 0; int opIndex = 0; int bias = 0; int basicCPsCount = 0; int index = 0; int oldIndex = 0; int weight = 0; int currentBase = 0; int delta = 0; int digit = 0; int threshold = 0; UTF32String decoded = null; pSource = StringUtil.Normalize(pSource); if (null == pSource) { throw new PunycodeException("Invalid input parameter(null)"); } // check if it starts with prefix if (pSource.StartsWith(this.Prefix)) { pSource = StringUtil.Normalize(pSource.Substring(this.Prefix.Length)); } // valid? if (null == pSource) { throw new PunycodeException("Invalid input parameter(null)"); } // check if it's delimiter if (pSource[pSource.Length - 1] == (char)Punycode.DELIMITER) { throw new PunycodeException("Invalid input format: string ends with delimiter"); } // Initializes opIndex = 0; bias = (int)Punycode.INITIAL_BIAS; decodedVal = (int)Punycode.INITIAL_N; decoded = new UTF32String(); // Handle the basic code points: Let b be the number of input code // points before the last delimiter, or 0 if there is none, then // copy the first b code points to the output. for (basicCPsCount = 0, index = 0; index < pSource.Length; index++) { if (PunyConverter.IsDelimiter(pSource[index])) { basicCPsCount = index; } } // check string length if (basicCPsCount > (int)Punycode.PUNYCODE_MAX_LENGTH) { throw new PunycodeException(string.Format("Input string too long: {0}", basicCPsCount)); } // For each char for (index = 0; index < basicCPsCount; index++) { char inputChar = pSource[index]; if (pCaseFlag != null) { pCaseFlag[index] = PunyConverter.IsFlagged(inputChar); } // check if it's ascii if (!PunyConverter.IsAscii(inputChar)) { throw new PunycodeException(string.Format("Decoding error, bad char in input string: {0}", inputChar)); } decoded.Append((int)inputChar); } // Main decoding loop: Start just after the last delimiter if any // basic code points were copied; start at the beginning otherwise. index = (basicCPsCount > 0) ? basicCPsCount + 1 : 0; while (index < pSource.Length) { // index is the index of the next character to be consumed, and // out is the number of code points in the output array. // Decode a generalized variable-length integer into delta, // which gets added to i. The overflow checking is easier // if we increase i as we go, then subtract off its starting // value at the end to obtain delta. oldIndex = opIndex; weight = 1; currentBase = (int)Punycode.BASE; for ( ; ;) { if (index >= pSource.Length) { throw new PunycodeException("Bad input string for decoding"); } digit = PunyConverter.DecodeDigit(pSource[index++]); // valid? if (digit >= (int)Punycode.BASE) { throw new PunycodeException("Invalid input string for decoding"); } if (digit > (int.MaxValue - opIndex) / weight) { throw new PunycodeException("Punycode decoding overflow"); } opIndex += digit * weight; // calculate the threshold if (currentBase <= bias) { threshold = (int)Punycode.T_MIN; } else { threshold = ((currentBase - bias) >= (int)Punycode.T_MAX) ? (int)Punycode.T_MAX : (currentBase - bias); } // Finished? if (digit < threshold) { break; } // check if weight is valid if (weight > (int.MaxValue / ((int)Punycode.BASE - threshold))) { throw new PunycodeException(string.Format("Decoding overflow, Invalid weight: {0}", weight)); } // new weight weight *= ((int)Punycode.BASE - threshold); currentBase += (int)Punycode.BASE; } // Adapt the bias delta = (oldIndex == 0) ? opIndex / (int)Punycode.DAMP : (opIndex - oldIndex) >> 1; delta += delta / (decoded.Length + 1); for (bias = 0; delta > (int)Punycode.CUTOFF; bias += (int)Punycode.BASE) { delta /= (int)Punycode.LOBASE; } // new bias bias += ((int)Punycode.LOBASE + 1) * delta / (delta + (int)Punycode.SKEW); // opIndex was supposed to wrap around from decoded.length()+1 to 0, // incrementing n each time, so we'll fix that now: if (opIndex / (decoded.Length + 1) > (int.MaxValue - decodedVal)) { throw new PunycodeException(string.Format("Decooding overflow, invalid op index:{0}", opIndex)); } // calculate new index and value decodedVal += opIndex / (decoded.Length + 1); opIndex %= (decoded.Length + 1); // valid? if (decoded.Length >= (int)Punycode.PUNYCODE_MAX_LENGTH) { throw new PunycodeException(string.Format("Decoding overflow, decoded string too long:{0}", decoded.Length)); } // check case flag // Case of last character determines uppercase flag if (pCaseFlag != null) { pCaseFlag[opIndex] = PunyConverter.IsFlagged(pSource[index - 1]); } // check if the number corresponds to a valid unicode character if (decodedVal > (int)Punycode.MAX_UNICODE) { throw new PunycodeException(string.Format("Decoding overflow, decoded code point out of range: U{0:X8}", decodedVal)); } // Insert decodedVal at position i of the output if (Converter.IsSeperator(decodedVal)) { throw new PunycodeException(string.Format("Decoding error, delimiter found: U{0:X8}", decodedVal)); } // add it to decoded output decoded.Insert(opIndex++, new int[1] { decodedVal }); } // return the decoded return(decoded); }