public StringBuffer Prepare(String src, StringPrepOptions options) { int ch; String mapOut = Map(src, options); UCharacterIterator iter = UCharacterIterator.GetInstance(mapOut); UCharacterDirection direction = UCharacterDirectionExtensions.CharDirectionCount, firstCharDir = UCharacterDirectionExtensions.CharDirectionCount; int rtlPos = -1, ltrPos = -1; bool rightToLeft = false, leftToRight = false; while ((ch = iter.NextCodePoint()) != UCharacterIterator.Done) { if (transform.prohibitedSet.Contains(ch) == true && ch != 0x0020) { throw new StringPrepParseException("A prohibited code point was found in the input", StringPrepErrorType.ProhibitedError, iter.GetText(), iter.Index); } direction = UChar.GetDirection(ch); if (firstCharDir == UCharacterDirectionExtensions.CharDirectionCount) { firstCharDir = direction; } if (direction == UCharacterDirection.LeftToRight) { leftToRight = true; ltrPos = iter.Index - 1; } if (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic) { rightToLeft = true; rtlPos = iter.Index - 1; } } // satisfy 2 if (leftToRight == true && rightToLeft == true) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } //satisfy 3 if (rightToLeft == true && !((firstCharDir == UCharacterDirection.RightToLeft || firstCharDir == UCharacterDirection.RightToLeftArabic) && (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic)) ) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } return(new StringBuffer(mapOut)); }
public void TestToString() { String[] name = { "Left-to-Right", "Right-to-Left", "European Number", "European Number Separator", "European Number Terminator", "Arabic Number", "Common Number Separator", "Paragraph Separator", "Segment Separator", "Whitespace", "Other Neutrals", "Left-to-Right Embedding", "Left-to-Right Override", "Right-to-Left Arabic", "Right-to-Left Embedding", "Right-to-Left Override", "Pop Directional Format", "Non-Spacing Mark", "Boundary Neutral", "First Strong Isolate", "Left-to-Right Isolate", "Right-to-Left Isolate", "Pop Directional Isolate", "Unassigned" }; for (UCharacterDirection i = UCharacterDirection.LeftToRight; // Placed <= because we need to consider 'Unassigned' // when it goes out of bounds of UCharacterDirection i <= UCharacterDirection.CharDirectionCount; i++) { if (!i.AsString().Equals(name[(int)i])) { Errln("Error toString for direction " + i + " expected " + name[(int)i]); } } }
/// <summary> /// Converts a <see cref="UCharacterDirection"/> to an <see cref="int"/>. /// Same as <c>(int)<paramref name="characterDirection"/></c>. /// </summary> /// <param name="characterDirection">This <see cref="UCharacterDirection"/>.</param> /// <returns>This direction as <see cref="int"/>.</returns> public static int ToInt32(this UCharacterDirection characterDirection) { return((int)characterDirection); }
/// <summary> /// Gets the name of the argument direction. /// </summary> /// <param name="dir">Direction type to retrieve name.</param> /// <returns>Directional name.</returns> /// <stable>ICU 2.1</stable> public static string AsString(this UCharacterDirection dir) { switch (dir) { case UCharacterDirection.LeftToRight: return("Left-to-Right"); case UCharacterDirection.RightToLeft: return("Right-to-Left"); case UCharacterDirection.EuropeanNumber: return("European Number"); case UCharacterDirection.EuropeanNumberSeparator: return("European Number Separator"); case UCharacterDirection.EuropeanNumberTerminator: return("European Number Terminator"); case UCharacterDirection.ArabicNumber: return("Arabic Number"); case UCharacterDirection.CommonNumberSeparator: return("Common Number Separator"); case UCharacterDirection.BlockSeparator: return("Paragraph Separator"); case UCharacterDirection.SegmentSeparator: return("Segment Separator"); case UCharacterDirection.WhiteSpaceNeutral: return("Whitespace"); case UCharacterDirection.OtherNeutral: return("Other Neutrals"); case UCharacterDirection.LeftToRightEmbedding: return("Left-to-Right Embedding"); case UCharacterDirection.LeftToRightOverride: return("Left-to-Right Override"); case UCharacterDirection.RightToLeftArabic: return("Right-to-Left Arabic"); case UCharacterDirection.RightToLeftEmbedding: return("Right-to-Left Embedding"); case UCharacterDirection.RightToLeftOverride: return("Right-to-Left Override"); case UCharacterDirection.PopDirectionalFormat: return("Pop Directional Format"); case UCharacterDirection.DirNonSpacingMark: return("Non-Spacing Mark"); case UCharacterDirection.BoundaryNeutral: return("Boundary Neutral"); case UCharacterDirection.FirstStrongIsolate: return("First Strong Isolate"); case UCharacterDirection.LeftToRightIsolate: return("Left-to-Right Isolate"); case UCharacterDirection.RightToLeftIsolate: return("Right-to-Left Isolate"); case UCharacterDirection.PopDirectionalIsolate: return("Pop Directional Isolate"); default: return("Unassigned"); } }
/* * boolean isLabelSeparator(int ch){ * int result = getCodePointValue(ch); * if( (result & 0x07) == LABEL_SEPARATOR){ * return true; * } * return false; * } */ /* * 1) Map -- For each character in the input, check if it has a mapping * and, if so, replace it with its mapping. * * 2) Normalize -- Possibly normalize the result of step 1 using Unicode * normalization. * * 3) Prohibit -- Check for any characters that are not allowed in the * output. If any are found, return an error. * * 4) Check bidi -- Possibly check for right-to-left characters, and if * any are found, make sure that the whole string satisfies the * requirements for bidirectional strings. If the string does not * satisfy the requirements for bidirectional strings, return an * error. * [Unicode3.2] defines several bidirectional categories; each character * has one bidirectional category assigned to it. For the purposes of * the requirements below, an "RandALCat character" is a character that * has Unicode bidirectional categories "R" or "AL"; an "LCat character" * is a character that has Unicode bidirectional category "L". Note * * * that there are many characters which fall in neither of the above * definitions; Latin digits (<U+0030> through <U+0039>) are examples of * this because they have bidirectional category "EN". * * In any profile that specifies bidirectional character handling, all * three of the following requirements MUST be met: * * 1) The characters in section 5.8 MUST be prohibited. * * 2) If a string contains any RandALCat character, the string MUST NOT * contain any LCat character. * * 3) If a string contains any RandALCat character, a RandALCat * character MUST be the first character of the string, and a * RandALCat character MUST be the last character of the string. */ /// <summary> /// Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC), /// checks for prohibited and BiDi characters in the order defined by RFC 3454 /// depending on the options specified in the profile. /// </summary> /// <param name="src">A <see cref="UCharacterIterator"/> object containing the source string.</param> /// <param name="options">A bit set of options: /// <list type="bullet"> /// <item><term><see cref="StringPrepOptions.Default"/></term><description>Prohibit processing of unassigned code points in the input.</description></item> /// <item><term><see cref="StringPrepOptions.AllowUnassigned"/></term><description>Treat the unassigned code points are in the input as normal Unicode code points.</description></item> /// </list> /// </param> /// <returns>A <see cref="StringBuffer"/> containing the output.</returns> /// <exception cref="StringPrepParseException">An exception occurs when parsing a string is invalid.</exception> /// <stable>ICU 2.8</stable> public StringBuffer Prepare(UCharacterIterator src, StringPrepOptions options) { // map StringBuffer mapOut = Map(src, options); StringBuffer normOut = mapOut;// initialize if (doNFKC) { // normalize normOut = Normalize(mapOut); } int ch; char result; UCharacterIterator iter = UCharacterIterator.GetInstance(normOut); Values val = new Values(); #pragma warning disable 612, 618 UCharacterDirection direction = UCharacterDirection.CharDirectionCount, firstCharDir = UCharacterDirection.CharDirectionCount; #pragma warning restore 612, 618 int rtlPos = -1, ltrPos = -1; bool rightToLeft = false, leftToRight = false; while ((ch = iter.NextCodePoint()) != UCharacterIterator.DONE) { result = GetCodePointValue(ch); GetValues(result, val); if (val.type == PROHIBITED) { throw new StringPrepParseException("A prohibited code point was found in the input", StringPrepErrorType.ProhibitedError, iter.GetText(), val.value); } if (checkBiDi) { direction = (UCharacterDirection)bdp.GetClass(ch); #pragma warning disable 612, 618 if (firstCharDir == UCharacterDirection.CharDirectionCount) #pragma warning restore 612, 618 { firstCharDir = direction; } if (direction == UCharacterDirection.LeftToRight) { leftToRight = true; ltrPos = iter.Index - 1; } if (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic) { rightToLeft = true; rtlPos = iter.Index - 1; } } } if (checkBiDi == true) { // satisfy 2 if (leftToRight == true && rightToLeft == true) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } //satisfy 3 if (rightToLeft == true && !((firstCharDir == UCharacterDirection.RightToLeft || firstCharDir == UCharacterDirection.RightToLeftArabic) && (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic)) ) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } } return(normOut); }