Beispiel #1
0
        public StringBuffer Prepare(String src, StringPrepOptions options)
        {
            int                ch;
            String             mapOut = Map(src, options);
            UCharacterIterator iter   = UCharacterIterator.GetInstance(mapOut);

            UCharacterDirection direction = UCharacterDirectionExtensions.CharDirectionCount,
                                firstCharDir = UCharacterDirectionExtensions.CharDirectionCount;
            int  rtlPos = -1, ltrPos = -1;
            bool rightToLeft = false, leftToRight = false;

            while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done)
            {
                if (transform.prohibitedSet.Contains(ch) == true && ch != 0x0020)
                {
                    throw new StringPrepParseException("A prohibited code point was found in the input",
                                                       StringPrepErrorType.ProhibitedError,
                                                       iter.GetText(), iter.Index);
                }

                direction = UChar.GetDirection(ch);
                if (firstCharDir == UCharacterDirectionExtensions.CharDirectionCount)
                {
                    firstCharDir = direction;
                }
                if (direction == UCharacterDirection.LeftToRight)
                {
                    leftToRight = true;
                    ltrPos      = iter.Index - 1;
                }
                if (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic)
                {
                    rightToLeft = true;
                    rtlPos      = iter.Index - 1;
                }
            }

            // satisfy 2
            if (leftToRight == true && rightToLeft == true)
            {
                throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
                                                   StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
            }

            //satisfy 3
            if (rightToLeft == true &&
                !((firstCharDir == UCharacterDirection.RightToLeft || firstCharDir == UCharacterDirection.RightToLeftArabic) &&
                  (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic))
                )
            {
                throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
                                                   StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos);
            }

            return(new StringBuffer(mapOut));
        }
Beispiel #2
0
        public void TestToString()
        {
            String[] name = { "Left-to-Right",
                              "Right-to-Left",
                              "European Number",
                              "European Number Separator",
                              "European Number Terminator",
                              "Arabic Number",
                              "Common Number Separator",
                              "Paragraph Separator",
                              "Segment Separator",
                              "Whitespace",
                              "Other Neutrals",
                              "Left-to-Right Embedding",
                              "Left-to-Right Override",
                              "Right-to-Left Arabic",
                              "Right-to-Left Embedding",
                              "Right-to-Left Override",
                              "Pop Directional Format",
                              "Non-Spacing Mark",
                              "Boundary Neutral",
                              "First Strong Isolate",
                              "Left-to-Right Isolate",
                              "Right-to-Left Isolate",
                              "Pop Directional Isolate",
                              "Unassigned" };

            for (UCharacterDirection i = UCharacterDirection.LeftToRight;
                 // Placed <= because we need to consider 'Unassigned'
                 // when it goes out of bounds of UCharacterDirection
                 i <= UCharacterDirection.CharDirectionCount; i++)
            {
                if (!i.AsString().Equals(name[(int)i]))
                {
                    Errln("Error toString for direction " + i + " expected " +
                          name[(int)i]);
                }
            }
        }
Beispiel #3
0
 /// <summary>
 /// Converts a <see cref="UCharacterDirection"/> to an <see cref="int"/>.
 /// Same as <c>(int)<paramref name="characterDirection"/></c>.
 /// </summary>
 /// <param name="characterDirection">This <see cref="UCharacterDirection"/>.</param>
 /// <returns>This direction as <see cref="int"/>.</returns>
 public static int ToInt32(this UCharacterDirection characterDirection)
 {
     return((int)characterDirection);
 }
Beispiel #4
0
        /// <summary>
        /// Gets the name of the argument direction.
        /// </summary>
        /// <param name="dir">Direction type to retrieve name.</param>
        /// <returns>Directional name.</returns>
        /// <stable>ICU 2.1</stable>
        public static string AsString(this UCharacterDirection dir)
        {
            switch (dir)
            {
            case UCharacterDirection.LeftToRight:
                return("Left-to-Right");

            case UCharacterDirection.RightToLeft:
                return("Right-to-Left");

            case UCharacterDirection.EuropeanNumber:
                return("European Number");

            case UCharacterDirection.EuropeanNumberSeparator:
                return("European Number Separator");

            case UCharacterDirection.EuropeanNumberTerminator:
                return("European Number Terminator");

            case UCharacterDirection.ArabicNumber:
                return("Arabic Number");

            case UCharacterDirection.CommonNumberSeparator:
                return("Common Number Separator");

            case UCharacterDirection.BlockSeparator:
                return("Paragraph Separator");

            case UCharacterDirection.SegmentSeparator:
                return("Segment Separator");

            case UCharacterDirection.WhiteSpaceNeutral:
                return("Whitespace");

            case UCharacterDirection.OtherNeutral:
                return("Other Neutrals");

            case UCharacterDirection.LeftToRightEmbedding:
                return("Left-to-Right Embedding");

            case UCharacterDirection.LeftToRightOverride:
                return("Left-to-Right Override");

            case UCharacterDirection.RightToLeftArabic:
                return("Right-to-Left Arabic");

            case UCharacterDirection.RightToLeftEmbedding:
                return("Right-to-Left Embedding");

            case UCharacterDirection.RightToLeftOverride:
                return("Right-to-Left Override");

            case UCharacterDirection.PopDirectionalFormat:
                return("Pop Directional Format");

            case UCharacterDirection.DirNonSpacingMark:
                return("Non-Spacing Mark");

            case UCharacterDirection.BoundaryNeutral:
                return("Boundary Neutral");

            case UCharacterDirection.FirstStrongIsolate:
                return("First Strong Isolate");

            case UCharacterDirection.LeftToRightIsolate:
                return("Left-to-Right Isolate");

            case UCharacterDirection.RightToLeftIsolate:
                return("Right-to-Left Isolate");

            case UCharacterDirection.PopDirectionalIsolate:
                return("Pop Directional Isolate");

            default:
                return("Unassigned");
            }
        }
Beispiel #5
0
        /*
         * boolean isLabelSeparator(int ch){
         *  int result = getCodePointValue(ch);
         *  if( (result & 0x07)  == LABEL_SEPARATOR){
         *      return true;
         *  }
         *  return false;
         * }
         */
        /*
         * 1) Map -- For each character in the input, check if it has a mapping
         *   and, if so, replace it with its mapping.
         *
         * 2) Normalize -- Possibly normalize the result of step 1 using Unicode
         *   normalization.
         *
         * 3) Prohibit -- Check for any characters that are not allowed in the
         *   output.  If any are found, return an error.
         *
         * 4) Check bidi -- Possibly check for right-to-left characters, and if
         *   any are found, make sure that the whole string satisfies the
         *   requirements for bidirectional strings.  If the string does not
         *   satisfy the requirements for bidirectional strings, return an
         *   error.
         *   [Unicode3.2] defines several bidirectional categories; each character
         *    has one bidirectional category assigned to it.  For the purposes of
         *    the requirements below, an "RandALCat character" is a character that
         *    has Unicode bidirectional categories "R" or "AL"; an "LCat character"
         *    is a character that has Unicode bidirectional category "L".  Note
         *
         *
         *    that there are many characters which fall in neither of the above
         *    definitions; Latin digits (<U+0030> through <U+0039>) are examples of
         *    this because they have bidirectional category "EN".
         *
         *    In any profile that specifies bidirectional character handling, all
         *    three of the following requirements MUST be met:
         *
         *    1) The characters in section 5.8 MUST be prohibited.
         *
         *    2) If a string contains any RandALCat character, the string MUST NOT
         *       contain any LCat character.
         *
         *    3) If a string contains any RandALCat character, a RandALCat
         *       character MUST be the first character of the string, and a
         *       RandALCat character MUST be the last character of the string.
         */

        /// <summary>
        /// Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
        /// checks for prohibited and BiDi characters in the order defined by RFC 3454
        /// depending on the options specified in the profile.
        /// </summary>
        /// <param name="src">A <see cref="UCharacterIterator"/> object containing the source string.</param>
        /// <param name="options">A bit set of options:
        /// <list type="bullet">
        ///     <item><term><see cref="StringPrepOptions.Default"/></term><description>Prohibit processing of unassigned code points in the input.</description></item>
        ///     <item><term><see cref="StringPrepOptions.AllowUnassigned"/></term><description>Treat the unassigned code points are in the input as normal Unicode code points.</description></item>
        /// </list>
        /// </param>
        /// <returns>A <see cref="StringBuffer"/> containing the output.</returns>
        /// <exception cref="StringPrepParseException">An exception occurs when parsing a string is invalid.</exception>
        /// <stable>ICU 2.8</stable>
        public StringBuffer Prepare(UCharacterIterator src, StringPrepOptions options)
        {
            // map
            StringBuffer mapOut  = Map(src, options);
            StringBuffer normOut = mapOut;// initialize

            if (doNFKC)
            {
                // normalize
                normOut = Normalize(mapOut);
            }

            int  ch;
            char result;
            UCharacterIterator iter = UCharacterIterator.GetInstance(normOut);
            Values             val  = new Values();

#pragma warning disable 612, 618
            UCharacterDirection direction    = UCharacterDirection.CharDirectionCount,
                                firstCharDir = UCharacterDirection.CharDirectionCount;
#pragma warning restore 612, 618
            int  rtlPos = -1, ltrPos = -1;
            bool rightToLeft = false, leftToRight = false;

            while ((ch = iter.NextCodePoint()) != UCharacterIterator.DONE)
            {
                result = GetCodePointValue(ch);
                GetValues(result, val);

                if (val.type == PROHIBITED)
                {
                    throw new StringPrepParseException("A prohibited code point was found in the input",
                                                       StringPrepErrorType.ProhibitedError, iter.GetText(), val.value);
                }

                if (checkBiDi)
                {
                    direction = (UCharacterDirection)bdp.GetClass(ch);
#pragma warning disable 612, 618
                    if (firstCharDir == UCharacterDirection.CharDirectionCount)
#pragma warning restore 612, 618
                    {
                        firstCharDir = direction;
                    }
                    if (direction == UCharacterDirection.LeftToRight)
                    {
                        leftToRight = true;
                        ltrPos      = iter.Index - 1;
                    }
                    if (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic)
                    {
                        rightToLeft = true;
                        rtlPos      = iter.Index - 1;
                    }
                }
            }
            if (checkBiDi == true)
            {
                // satisfy 2
                if (leftToRight == true && rightToLeft == true)
                {
                    throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
                                                       StringPrepErrorType.CheckBiDiError, iter.GetText(),
                                                       (rtlPos > ltrPos) ? rtlPos : ltrPos);
                }

                //satisfy 3
                if (rightToLeft == true &&
                    !((firstCharDir == UCharacterDirection.RightToLeft || firstCharDir == UCharacterDirection.RightToLeftArabic) &&
                      (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic))
                    )
                {
                    throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
                                                       StringPrepErrorType.CheckBiDiError, iter.GetText(),
                                                       (rtlPos > ltrPos) ? rtlPos : ltrPos);
                }
            }
            return(normOut);
        }