C# (CSharp) ICU4N.Text UTF16.ValueOf Examples

Programming Language: C# (CSharp)

Namespace/Package Name: ICU4N.Text

Class/Type: UTF16

Method/Function: ValueOf

Examples at hotexamples.com: 9

C# (CSharp) ICU4N.Text UTF16.ValueOf - 9 examples found. These are the top rated real world C# (CSharp) examples of ICU4N.Text.UTF16.ValueOf extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GetCharCount(18)

CharAt(12)

ValueOf(9)

Append(4)

CountCodePoint(1)

FindOffsetFromCodePoint(1)

IsLeadSurrogate(1)

IsTrailSurrogate(1)

Example #1

Show file

File: UnicodeSetIterator.cs Project: SilentCC/ICU4N

 /// <summary>
 /// Gets the current string from the iterator. Only use after calling <see cref="Next()"/>,
 /// not <see cref="NextRange()"/>.
 /// </summary>
 /// <stable>ICU 4.0</stable>
 public virtual string GetString() // ICU4N TODO: API String vs GetString() - confusing. This should be made into String property and the current string property made into a private field.
 {
     if (Codepoint != IS_STRING)
     {
         return(UTF16.ValueOf(Codepoint));
     }
     return(String);
 }

Example #2

Show file

        public static void Permute(string source, bool skipZeros, ISet <string> output)
        {
            // TODO: optimize
            //if (PROGRESS) System.out.println("Permute: " + source);

            // optimization:
            // if zero or one character, just return a set with it
            // we check for length < 2 to keep from counting code points all the time
            if (source.Length <= 2 && UTF16.CountCodePoint(source) <= 1)
            {
                output.Add(source);
                return;
            }

            // otherwise iterate through the string, and recursively permute all the other characters
            ISet <string> subpermute = new HashSet <string>();
            int           cp;

            for (int i = 0; i < source.Length; i += UTF16.GetCharCount(cp))
            {
                cp = UTF16.CharAt(source, i);

                // optimization:
                // if the character is canonical combining class zero,
                // don't permute it
                if (skipZeros && i != 0 && UCharacter.GetCombiningClass(cp) == 0)
                {
                    //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
                    continue;
                }

                // see what the permutations of the characters before and after this one are
                subpermute.Clear();
                Permute(source.Substring(0, i - 0)                                              // ICU4N: Checked 2nd parameter
                        + source.Substring(i + UTF16.GetCharCount(cp)), skipZeros, subpermute); // ICU4N: Substring only has 1 parameter

                // prefix this character to all of them
                string chStr = UTF16.ValueOf(source, i);
                foreach (string s in subpermute)
                {
                    string piece = chStr + s;
                    //if (PROGRESS) System.out.println("  Piece: " + piece);
                    output.Add(piece);
                }
            }
        }

Example #3

Show file

        public SourceTargetUtility(ITransform <string, string> transform, Normalizer2 normalizer)
        {
            this.transform = transform;
            if (normalizer != null)
            {
                //            synchronized (SourceTargetUtility.class) {
                //                if (NFC == null) {
                //                    NFC = Normalizer2.getInstance(null, "nfc", Mode.COMPOSE);
                //                    for (int i = 0; i <= 0x10FFFF; ++i) {
                //                        String d = NFC.getDecomposition(i);
                //                        if (d == null) {
                //                            continue;
                //                        }
                //                        String s = NFC.normalize(d);
                //                        if (!CharSequences.equals(i, s)) {
                //                            continue;
                //                        }
                //                        // composes
                //                        boolean first = false;
                //                        for (int trailing : CharSequences.codePoints(d)) {
                //                            if (first) {
                //                                first = false;
                //                            } else {
                //                                TRAILING_COMBINING.add(trailing);
                //                            }
                //                        }
                //                    }
                //                }
                //            }
                sourceCache = new UnicodeSet("[:^ccc=0:]");
            }
            else
            {
                sourceCache = new UnicodeSet();
            }
            sourceStrings = new HashSet <string>();
            for (int i = 0; i <= 0x10FFFF; ++i)
            {
                string s     = transform.Transform(UTF16.ValueOf(i));
                bool   added = false;
                if (!CharSequences.Equals(i, s))
                {
                    sourceCache.Add(i);
                    added = true;
                }
                if (normalizer == null)
                {
                    continue;
                }
                string d = NFC.GetDecomposition(i);
                if (d == null)
                {
                    continue;
                }
                s = transform.Transform(d);
                if (!d.Equals(s))
                {
                    sourceStrings.Add(d);
                }
                if (added)
                {
                    continue;
                }
                if (!normalizer.IsInert(i))
                {
                    sourceCache.Add(i);
                    continue;
                }
                // see if any of the non-starters change s; if so, add i
                //            for (String ns : TRAILING_COMBINING) {
                //                String s2 = transform.transform(s + ns);
                //                if (!s2.startsWith(s)) {
                //                    sourceCache.add(i);
                //                    break;
                //                }
                //            }

                // int endOfFirst = CharSequences.onCharacterBoundary(d, 1) ? 1 : 2;
                // if (endOfFirst >= d.length()) {
                // continue;
                // }
                // // now add all initial substrings
                // for (int j = 1; j < d.length(); ++j) {
                // if (!CharSequences.onCharacterBoundary(d, j)) {
                // continue;
                // }
                // String dd = d.substring(0,j);
                // s = transform.transform(dd);
                // if (!dd.equals(s)) {
                // sourceStrings.add(dd);
                // }
                // }
            }
            sourceCache.Freeze();
        }

Example #4

Show file

File: CaseFoldTransliterator.cs Project: NightOwl888/ICU4N

        /// <summary>
        /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, TransliterationPosition, bool)"/>
        /// </summary>
        protected override void HandleTransliterate(IReplaceable text,
                                                    TransliterationPosition offsets, bool isIncremental)
        {
            lock (syncLock)
            {
                if (csp == null)
                {
                    return;
                }

                if (offsets.Start >= offsets.Limit)
                {
                    return;
                }

                iter.SetText(text);
                result.Length = 0;
                int c, delta;

                // Walk through original string
                // If there is a case change, modify corresponding position in replaceable

                iter.SetIndex(offsets.Start);
                iter.SetLimit(offsets.Limit);
                iter.SetContextLimits(offsets.ContextStart, offsets.ContextLimit);
                while ((c = iter.NextCaseMapCP()) >= 0)
                {
                    c = csp.ToFullFolding(c, result, 0); // toFullFolding(int c, StringBuffer out, int options)

                    if (iter.DidReachLimit && isIncremental)
                    {
                        // the case mapping function tried to look beyond the context limit
                        // wait for more input
                        offsets.Start = iter.CaseMapCPStart;
                        return;
                    }

                    /* decode the result */
                    if (c < 0)
                    {
                        /* c mapped to itself, no change */
                        continue;
                    }
                    else if (c <= UCaseProperties.MaxStringLength)
                    {
                        /* replace by the mapping string */
                        delta         = iter.Replace(result.ToString());
                        result.Length = 0;
                    }
                    else
                    {
                        /* replace by single-code point mapping */
                        delta = iter.Replace(UTF16.ValueOf(c));
                    }

                    if (delta != 0)
                    {
                        offsets.Limit        += delta;
                        offsets.ContextLimit += delta;
                    }
                }
                offsets.Start = offsets.Limit;
            }
        }

Example #5

Show file

File: UnescapeTransliterator.cs Project: SilentCC/ICU4N

        /// <summary>
        /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>.
        /// </summary>
        protected override void HandleTransliterate(IReplaceable text,
                                                    Position pos, bool isIncremental)
        {
            int start = pos.Start;
            int limit = pos.Limit;
            int i, ipat;

            //loop:
            while (start < limit)
            {
                // Loop over the forms in spec[].  Exit this loop when we
                // match one of the specs.  Exit the outer loop if a
                // partial match is detected and isIncremental is true.
                for (ipat = 0; spec[ipat] != END;)
                {
                    // Read the header
                    int prefixLen = spec[ipat++];
                    int suffixLen = spec[ipat++];
                    int radix     = spec[ipat++];
                    int minDigits = spec[ipat++];
                    int maxDigits = spec[ipat++];

                    // s is a copy of start that is advanced over the
                    // characters as we parse them.
                    int  s     = start;
                    bool match = true;

                    for (i = 0; i < prefixLen; ++i)
                    {
                        if (s >= limit)
                        {
                            if (i > 0)
                            {
                                // We've already matched a character.  This is
                                // a partial match, so we return if in
                                // incremental mode.  In non-incremental mode,
                                // go to the next spec.
                                if (isIncremental)
                                {
                                    goto loop_break;
                                }
                                match = false;
                                break;
                            }
                        }
                        char c = text[s++];
                        if (c != spec[ipat + i])
                        {
                            match = false;
                            break;
                        }
                    }

                    if (match)
                    {
                        int u          = 0;
                        int digitCount = 0;
                        for (; ;)
                        {
                            if (s >= limit)
                            {
                                // Check for partial match in incremental mode.
                                if (s > start && isIncremental)
                                {
                                    goto loop_break;
                                }
                                break;
                            }
                            int ch    = text.Char32At(s);
                            int digit = UCharacter.Digit(ch, radix);
                            if (digit < 0)
                            {
                                break;
                            }
                            s += UTF16.GetCharCount(ch);
                            u  = (u * radix) + digit;
                            if (++digitCount == maxDigits)
                            {
                                break;
                            }
                        }

                        match = (digitCount >= minDigits);

                        if (match)
                        {
                            for (i = 0; i < suffixLen; ++i)
                            {
                                if (s >= limit)
                                {
                                    // Check for partial match in incremental mode.
                                    if (s > start && isIncremental)
                                    {
                                        goto loop_break;
                                    }
                                    match = false;
                                    break;
                                }
                                char c = text[s++];
                                if (c != spec[ipat + prefixLen + i])
                                {
                                    match = false;
                                    break;
                                }
                            }

                            if (match)
                            {
                                // At this point, we have a match
                                string str = UTF16.ValueOf(u);
                                text.Replace(start, s, str);
                                limit -= s - start - str.Length;
                                // The following break statement leaves the
                                // loop that is traversing the forms in
                                // spec[].  We then parse the next input
                                // character.
                                break;
                            }
                        }
                    }

                    ipat += prefixLen + suffixLen;
                }

                if (start < limit)
                {
                    start += UTF16.GetCharCount(text.Char32At(start));
                }
            }
            loop_break : { }

            pos.ContextLimit += limit - pos.Limit;
            pos.Limit         = limit;
            pos.Start         = start;
        }

Example #6

Show file

        /// <summary>
        /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>.
        /// </summary>
        protected override void HandleTransliterate(IReplaceable text,
                                                    Position offsets, bool isIncremental)
        {
            lock (this)
            {
                // TODO reimplement, see ustrcase.c
                // using a real word break iterator
                //   instead of just looking for a transition between cased and uncased characters
                // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
                // needs to take isIncremental into account because case mappings are context-sensitive
                //   also detect when lowercasing function did not finish because of context

                if (offsets.Start >= offsets.Limit)
                {
                    return;
                }

                // case type: >0 cased (UCaseProps.LOWER etc.)  ==0 uncased  <0 case-ignorable
                int type;

                // Our mode; we are either converting letter toTitle or
                // toLower.
                bool doTitle = true;

                // Determine if there is a preceding context of cased case-ignorable*,
                // in which case we want to start in toLower mode.  If the
                // prior context is anything else (including empty) then start
                // in toTitle mode.
                int c, start;
                for (start = offsets.Start - 1; start >= offsets.ContextStart; start -= UTF16.GetCharCount(c))
                {
                    c    = text.Char32At(start);
                    type = csp.GetTypeOrIgnorable(c);
                    if (type > 0)
                    { // cased
                        doTitle = false;
                        break;
                    }
                    else if (type == 0)
                    { // uncased but not ignorable
                        break;
                    }
                    // else (type<0) case-ignorable: continue
                }

                // Convert things after a cased character toLower; things
                // after a uncased, non-case-ignorable character toTitle.  Case-ignorable
                // characters are copied directly and do not change the mode.

                iter.SetText(text);
                iter.SetIndex(offsets.Start);
                iter.SetLimit(offsets.Limit);
                iter.SetContextLimits(offsets.ContextStart, offsets.ContextLimit);

                result.Length = 0;

                // Walk through original string
                // If there is a case change, modify corresponding position in replaceable
                int delta;

                while ((c = iter.NextCaseMapCP()) >= 0)
                {
                    type = csp.GetTypeOrIgnorable(c);
                    if (type >= 0)
                    { // not case-ignorable
                        if (doTitle)
                        {
                            c = csp.ToFullTitle(c, iter, result, caseLocale);
                        }
                        else
                        {
                            c = csp.ToFullLower(c, iter, result, caseLocale);
                        }
                        doTitle = type == 0; // doTitle=isUncased

                        if (iter.DidReachLimit && isIncremental)
                        {
                            // the case mapping function tried to look beyond the context limit
                            // wait for more input
                            offsets.Start = iter.CaseMapCPStart;
                            return;
                        }

                        /* decode the result */
                        if (c < 0)
                        {
                            /* c mapped to itself, no change */
                            continue;
                        }
                        else if (c <= UCaseProps.MAX_STRING_LENGTH)
                        {
                            /* replace by the mapping string */
                            delta         = iter.Replace(result.ToString());
                            result.Length = 0;
                        }
                        else
                        {
                            /* replace by single-code point mapping */
                            delta = iter.Replace(UTF16.ValueOf(c));
                        }

                        if (delta != 0)
                        {
                            offsets.Limit        += delta;
                            offsets.ContextLimit += delta;
                        }
                    }
                }
                offsets.Start = offsets.Limit;
            }
        }

Example #7

Show file

        /// <summary>
        /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>.
        /// </summary>
        protected override void HandleTransliterate(IReplaceable text,
                                                    Position offsets, bool isIncremental)
        {
            int maxLen = UCharacterName.Instance.MaxCharNameLength + 1; // allow for temporary trailing space

            StringBuffer name = new StringBuffer(maxLen);

            // Get the legal character set
            UnicodeSet legal = new UnicodeSet();

            UCharacterName.Instance.GetCharNameCharacters(legal);

            int cursor = offsets.Start;
            int limit  = offsets.Limit;

            // Modes:
            // 0 - looking for open delimiter
            // 1 - after open delimiter
            int mode    = 0;
            int openPos = -1; // open delim candidate pos

            int c;

            while (cursor < limit)
            {
                c = text.Char32At(cursor);

                switch (mode)
                {
                case 0:   // looking for open delimiter
                    if (c == OPEN_DELIM)
                    {     // quick check first
                        openPos = cursor;
                        int i = Utility.ParsePattern(OPEN_PAT, text, cursor, limit);
                        if (i >= 0 && i < limit)
                        {
                            mode        = 1;
                            name.Length = 0;
                            cursor      = i;
                            continue;     // *** reprocess char32At(cursor)
                        }
                    }
                    break;

                case 1:     // after open delimiter
                            // Look for legal chars.  If \s+ is found, convert it
                            // to a single space.  If closeDelimiter is found, exit
                            // the loop.  If any other character is found, exit the
                            // loop.  If the limit is reached, exit the loop.

                    // Convert \s+ => SPACE.  This assumes there are no
                    // runs of >1 space characters in names.
                    if (PatternProps.IsWhiteSpace(c))
                    {
                        // Ignore leading whitespace
                        if (name.Length > 0 &&
                            name[name.Length - 1] != SPACE)
                        {
                            name.Append(SPACE);
                            // If we are too long then abort.  maxLen includes
                            // temporary trailing space, so use '>'.
                            if (name.Length > maxLen)
                            {
                                mode = 0;
                            }
                        }
                        break;
                    }

                    if (c == CLOSE_DELIM)
                    {
                        int len = name.Length;

                        // Delete trailing space, if any
                        if (len > 0 &&
                            name[len - 1] == SPACE)
                        {
                            name.Length = --len;
                        }

                        c = UCharacter.GetCharFromExtendedName(name.ToString());
                        if (c != -1)
                        {
                            // Lookup succeeded

                            // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
                            cursor++;     // advance over CLOSE_DELIM

                            string str = UTF16.ValueOf(c);
                            text.Replace(openPos, cursor, str);

                            // Adjust indices for the change in the length of
                            // the string.  Do not assume that str.length() ==
                            // 1, in case of surrogates.
                            int delta = cursor - openPos - str.Length;
                            cursor -= delta;
                            limit  -= delta;
                            // assert(cursor == openPos + str.length());
                        }
                        // If the lookup failed, we leave things as-is and
                        // still switch to mode 0 and continue.
                        mode    = 0;
                        openPos = -1; // close off candidate
                        continue;     // *** reprocess char32At(cursor)
                    }

                    if (legal.Contains(c))
                    {
                        UTF16.Append(name, c);
                        // If we go past the longest possible name then abort.
                        // maxLen includes temporary trailing space, so use '>='.
                        if (name.Length >= maxLen)
                        {
                            mode = 0;
                        }
                    }

                    // Invalid character
                    else
                    {
                        --cursor;     // Backup and reprocess this character
                        mode = 0;
                    }

                    break;
                }

                cursor += UTF16.GetCharCount(c);
            }

            offsets.ContextLimit += limit - offsets.Limit;
            offsets.Limit         = limit;
            // In incremental mode, only advance the cursor up to the last
            // open delimiter candidate.
            offsets.Start = (isIncremental && openPos >= 0) ? openPos : cursor;
        }

Example #8

Show file

        /// <summary>
        /// See if the decomposition of cp2 is at segment starting at <paramref name="segmentPos"/>
        /// (with canonical rearrangment!).
        /// If so, take the remainder, and return the equivalents.
        /// </summary>
        /// <param name="comp"></param>
        /// <param name="segment"></param>
        /// <param name="segmentPos"></param>
        /// <param name="buf"></param>
        /// <returns></returns>
        private ISet <string> Extract(int comp, string segment, int segmentPos, StringBuffer buf)
        {
            if (PROGRESS)
            {
                Console.Out.WriteLine(" extract: " + Utility.Hex(UTF16.ValueOf(comp))
                                      + ", " + Utility.Hex(segment.Substring(segmentPos)));
            }

            string decomp = nfcImpl.GetDecomposition(comp);

            if (decomp == null)
            {
                decomp = UTF16.ValueOf(comp);
            }

            // See if it matches the start of segment (at segmentPos)
            bool ok = false;
            int  cp;
            int  decompPos = 0;
            int  decompCp  = UTF16.CharAt(decomp, 0);

            decompPos += UTF16.GetCharCount(decompCp); // adjust position to skip first char
                                                       //int decompClass = getClass(decompCp);
            buf.Length = 0;                            // initialize working buffer, shared among callees

            for (int i = segmentPos; i < segment.Length; i += UTF16.GetCharCount(cp))
            {
                cp = UTF16.CharAt(segment, i);
                if (cp == decompCp)
                { // if equal, eat another cp from decomp
                    if (PROGRESS)
                    {
                        Console.Out.WriteLine("  matches: " + Utility.Hex(UTF16.ValueOf(cp)));
                    }
                    if (decompPos == decomp.Length)
                    {                                                              // done, have all decomp characters!
                        buf.Append(segment.Substring(i + UTF16.GetCharCount(cp))); // add remaining segment chars
                        ok = true;
                        break;
                    }
                    decompCp   = UTF16.CharAt(decomp, decompPos);
                    decompPos += UTF16.GetCharCount(decompCp);
                    //decompClass = getClass(decompCp);
                }
                else
                {
                    if (PROGRESS)
                    {
                        Console.Out.WriteLine("  buffer: " + Utility.Hex(UTF16.ValueOf(cp)));
                    }
                    // brute force approach
                    UTF16.Append(buf, cp);

                    /* TODO: optimize
                     * // since we know that the classes are monotonically increasing, after zero
                     * // e.g. 0 5 7 9 0 3
                     * // we can do an optimization
                     * // there are only a few cases that work: zero, less, same, greater
                     * // if both classes are the same, we fail
                     * // if the decomp class < the segment class, we fail
                     *
                     * segClass = getClass(cp);
                     * if (decompClass <= segClass) return null;
                     */
                }
            }
            if (!ok)
            {
                return(null);     // we failed, characters left over
            }
            if (PROGRESS)
            {
                Console.Out.WriteLine("Matches");
            }
            if (buf.Length == 0)
            {
                return(SET_WITH_NULL_STRING);                 // succeed, but no remainder
            }
            string remainder = buf.ToString();

            // brute force approach
            // to check to make sure result is canonically equivalent

            /*
             * String trial = Normalizer.normalize(UTF16.valueOf(comp) + remainder, Normalizer.DECOMP, 0);
             * if (!segment.regionMatches(segmentPos, trial, 0, segment.length() - segmentPos)) return null;
             */

            if (0 != Normalizer.Compare(UTF16.ValueOf(comp) + remainder, segment.Substring(segmentPos), 0))
            {
                return(null);
            }

            // get the remaining combinations
            return(GetEquivalents2(remainder));
        }

Example #9

Show file

        private ISet <string> GetEquivalents2(string segment)
        {
            ISet <string> result = new HashSet <string>();

            if (PROGRESS)
            {
                Console.Out.WriteLine("Adding: " + Utility.Hex(segment));
            }

            result.Add(segment);
            StringBuffer workingBuffer = new StringBuffer();
            UnicodeSet   starts        = new UnicodeSet();

            // cycle through all the characters
            int cp;

            for (int i = 0; i < segment.Length; i += Character.CharCount(cp))
            {
                // see if any character is at the start of some decomposition
                cp = segment.CodePointAt(i);
                if (!nfcImpl.GetCanonStartSet(cp, starts))
                {
                    continue;
                }
                // if so, see which decompositions match
                for (UnicodeSetIterator iter = new UnicodeSetIterator(starts); iter.Next();)
                {
                    int           cp2       = iter.Codepoint;
                    ISet <string> remainder = Extract(cp2, segment, i, workingBuffer);
                    if (remainder == null)
                    {
                        continue;
                    }

                    // there were some matches, so add all the possibilities to the set.
                    string prefix = segment.Substring(0, i - 0); // ICU4N: Checked 2nd parameter
                    prefix += UTF16.ValueOf(cp2);
                    foreach (string item in remainder)
                    {
                        result.Add(prefix + item);
                    }
                }
            }
            return(result);

            /*
             * Set result = new HashSet();
             * if (PROGRESS) System.out.println("Adding: " + NAME.transliterate(segment));
             * result.add(segment);
             * StringBuffer workingBuffer = new StringBuffer();
             *
             * // cycle through all the characters
             * int cp;
             *
             * for (int i = 0; i < segment.length(); i += UTF16.getCharCount(cp)) {
             *  // see if any character is at the start of some decomposition
             *  cp = UTF16.charAt(segment, i);
             *  NormalizerImpl.getCanonStartSet(c,fillSet)
             *  UnicodeSet starts = AT_START.get(cp);
             *  if (starts == null) continue;
             *  UnicodeSetIterator usi = new UnicodeSetIterator(starts);
             *  // if so, see which decompositions match
             *  while (usi.next()) {
             *      int cp2 = usi.codepoint;
             *      // we know that there are no strings in it
             *      // so we don't have to check CharacterIterator.IS_STRING
             *      Set remainder = extract(cp2, segment, i, workingBuffer);
             *      if (remainder == null) continue;
             *
             *      // there were some matches, so add all the possibilities to the set.
             *      String prefix = segment.substring(0, i) + UTF16.valueOf(cp2);
             *      Iterator it = remainder.iterator();
             *      while (it.hasNext()) {
             *          String item = (String) it.next();
             *          if (PROGRESS) System.out.println("Adding: " + NAME.transliterate(prefix + item));
             *          result.add(prefix + item);
             *      }
             *  }
             * }
             * return result;
             */
        }