예제 #1
0
        //---------------------------------------------------------------------
        // Private members
        //---------------------------------------------------------------------
        private UTF32String Map(UTF32String pSource, IEncodingOption pOption)
        {
            bool bAllowUnassigned = false;
            bool bNormalize       = true;
            int  cPoint           = 0;

            int[]       mPoint = null;
            UTF32String mapped = null;

            // Initializes
            mapped           = new UTF32String();
            bAllowUnassigned = (pOption != null) && pOption.IsOptionSet(EncodingOption.ALLOW_UNASSIGNED);
            bNormalize       = (pOption != null) && pOption.IsOptionSet(EncodingOption.USE_NORMALIZE);

            // valid?
            if (null == pSource)
            {
                return(mapped);
            }

            // for each char
            for (int index = 0; index < pSource.Length; index++)
            {
                // get code point
                cPoint = pSource[index];

                // check if it's unassigned
                if (bAllowUnassigned && m_unassignedMapping.IsUnassigned(cPoint))
                {
                    throw new UnassignedCodePointException(string.Format("Unassigned code point found: {0} in {1:X8}\r\n", cPoint, pSource.ToString()));
                }

                // check if there is any map nothing
                if (m_nothingMapping.IsMapNothing(cPoint))
                {
                    continue;
                }

                // check the map
                if (bNormalize)
                {
                    mPoint = m_normalizedCaseMapping.Mapping(cPoint);
                }
                else
                {
                    mPoint = m_unnormalizedCaseMapping.Mapping(cPoint);
                }

                // having mapping?
                if ((null == mPoint) || (0 == mPoint.Length))
                {
                    mPoint = new int [1] {
                        cPoint
                    }
                }
                ;

                // add the mapping to the output
                for (int mIndex = 0; mIndex < mPoint.Length; mIndex++)
                {
                    mapped.Append(mPoint[mIndex]);
                }
            }

            //Done
            return(mapped);
        }
예제 #2
0
        public override UTF32String Decode(string pSource, bool[] pCaseFlag)
        {
            int decodedVal = 0;
            //int			out				= 0;
            int         opIndex       = 0;
            int         bias          = 0;
            int         basicCPsCount = 0;
            int         index         = 0;
            int         oldIndex      = 0;
            int         weight        = 0;
            int         currentBase   = 0;
            int         delta         = 0;
            int         digit         = 0;
            int         threshold     = 0;
            UTF32String decoded       = null;

            pSource = StringUtil.Normalize(pSource);
            if (null == pSource)
            {
                throw new PunycodeException("Invalid input parameter(null)");
            }

            // check if it starts with prefix
            if (pSource.StartsWith(this.Prefix))
            {
                pSource = StringUtil.Normalize(pSource.Substring(this.Prefix.Length));
            }

            // valid?
            if (null == pSource)
            {
                throw new PunycodeException("Invalid input parameter(null)");
            }

            // check if it's delimiter
            if (pSource[pSource.Length - 1] == (char)Punycode.DELIMITER)
            {
                throw new PunycodeException("Invalid input format: string ends with delimiter");
            }

            // Initializes
            opIndex    = 0;
            bias       = (int)Punycode.INITIAL_BIAS;
            decodedVal = (int)Punycode.INITIAL_N;
            decoded    = new UTF32String();

            // Handle the basic code points:  Let b be the number of input code
            // points before the last delimiter, or 0 if there is none, then
            // copy the first b code points to the output.
            for (basicCPsCount = 0, index = 0; index < pSource.Length; index++)
            {
                if (PunyConverter.IsDelimiter(pSource[index]))
                {
                    basicCPsCount = index;
                }
            }

            // check string length
            if (basicCPsCount > (int)Punycode.PUNYCODE_MAX_LENGTH)
            {
                throw new PunycodeException(string.Format("Input string too long: {0}", basicCPsCount));
            }

            // For each char
            for (index = 0; index < basicCPsCount; index++)
            {
                char inputChar = pSource[index];

                if (pCaseFlag != null)
                {
                    pCaseFlag[index] = PunyConverter.IsFlagged(inputChar);
                }

                // check if it's ascii
                if (!PunyConverter.IsAscii(inputChar))
                {
                    throw new PunycodeException(string.Format("Decoding error, bad char in input string: {0}", inputChar));
                }

                decoded.Append((int)inputChar);
            }

            // Main decoding loop:  Start just after the last delimiter if any
            // basic code points were copied; start at the beginning otherwise.
            index = (basicCPsCount > 0) ? basicCPsCount + 1 : 0;
            while (index < pSource.Length)
            {
                // index is the index of the next character to be consumed, and
                // out is the number of code points in the output array.
                // Decode a generalized variable-length integer into delta,
                // which gets added to i.  The overflow checking is easier
                // if we increase i as we go, then subtract off its starting
                // value at the end to obtain delta.
                oldIndex    = opIndex;
                weight      = 1;
                currentBase = (int)Punycode.BASE;

                for ( ; ;)
                {
                    if (index >= pSource.Length)
                    {
                        throw new PunycodeException("Bad input string for decoding");
                    }

                    digit = PunyConverter.DecodeDigit(pSource[index++]);

                    // valid?
                    if (digit >= (int)Punycode.BASE)
                    {
                        throw new PunycodeException("Invalid input string for decoding");
                    }

                    if (digit > (int.MaxValue - opIndex) / weight)
                    {
                        throw new PunycodeException("Punycode decoding overflow");
                    }

                    opIndex += digit * weight;

                    // calculate the threshold
                    if (currentBase <= bias)
                    {
                        threshold = (int)Punycode.T_MIN;
                    }
                    else
                    {
                        threshold = ((currentBase - bias) >= (int)Punycode.T_MAX) ? (int)Punycode.T_MAX : (currentBase - bias);
                    }

                    // Finished?
                    if (digit < threshold)
                    {
                        break;
                    }

                    // check if weight is valid
                    if (weight > (int.MaxValue / ((int)Punycode.BASE - threshold)))
                    {
                        throw new PunycodeException(string.Format("Decoding overflow, Invalid weight: {0}", weight));
                    }

                    // new weight
                    weight      *= ((int)Punycode.BASE - threshold);
                    currentBase += (int)Punycode.BASE;
                }

                // Adapt the bias
                delta  = (oldIndex == 0) ? opIndex / (int)Punycode.DAMP : (opIndex - oldIndex) >> 1;
                delta += delta / (decoded.Length + 1);

                for (bias = 0; delta > (int)Punycode.CUTOFF; bias += (int)Punycode.BASE)
                {
                    delta /= (int)Punycode.LOBASE;
                }

                // new bias
                bias += ((int)Punycode.LOBASE + 1) * delta / (delta + (int)Punycode.SKEW);

                // opIndex was supposed to wrap around from decoded.length()+1 to 0,
                // incrementing n each time, so we'll fix that now:

                if (opIndex / (decoded.Length + 1) > (int.MaxValue - decodedVal))
                {
                    throw new PunycodeException(string.Format("Decooding overflow, invalid op index:{0}", opIndex));
                }

                // calculate new index and value
                decodedVal += opIndex / (decoded.Length + 1);
                opIndex    %= (decoded.Length + 1);

                // valid?
                if (decoded.Length >= (int)Punycode.PUNYCODE_MAX_LENGTH)
                {
                    throw new PunycodeException(string.Format("Decoding overflow, decoded string too long:{0}", decoded.Length));
                }

                // check case flag
                // Case of last character determines uppercase flag
                if (pCaseFlag != null)
                {
                    pCaseFlag[opIndex] = PunyConverter.IsFlagged(pSource[index - 1]);
                }

                // check if the number corresponds to a valid unicode character
                if (decodedVal > (int)Punycode.MAX_UNICODE)
                {
                    throw new PunycodeException(string.Format("Decoding overflow, decoded code point out of range: U{0:X8}", decodedVal));
                }

                // Insert decodedVal at position i of the output
                if (Converter.IsSeperator(decodedVal))
                {
                    throw new PunycodeException(string.Format("Decoding error, delimiter found: U{0:X8}", decodedVal));
                }

                // add it to decoded output
                decoded.Insert(opIndex++, new int[1] {
                    decodedVal
                });
            }

            // return the decoded
            return(decoded);
        }