Esempio n. 1
0
	    // ---------------------------------------------------------------------------------
	    //
	    // scanSet Construct a UnicodeSet from the text at the current scan
	    // position. Advance the scan position to the first character
	    // after the set.
	    //
	    // A new RBBI setref node referring to the set is pushed onto the node
	    // stack.
	    //
	    // The scan position is normally under the control of the state machine
	    // that controls rule parsing. UnicodeSets, however, are parsed by
	    // the UnicodeSet constructor, not by the RBBI rule parser.
	    //
	    // ---------------------------------------------------------------------------------
	    internal void ScanSet() {
	        UnicodeSet uset = null;
	        int startPos;
	        ILOG.J2CsMapping.Text.ParsePosition pos = new ILOG.J2CsMapping.Text.ParsePosition(fScanIndex);
	        int i;
	
	        startPos = fScanIndex;
	        try {
	            uset = new UnicodeSet(fRB.fRules, pos, fSymbolTable,
	                    IBM.ICU.Text.UnicodeSet.IGNORE_SPACE);
	        } catch (Exception e) { // TODO: catch fewer exception types.
	            // Repackage UnicodeSet errors as RBBI rule builder errors, with
	            // location info.
	            Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_MALFORMED_SET);
	        }
	
	        // Verify that the set contains at least one code point.
	        //
	        if (uset.IsEmpty()) {
	            // This set is empty.
	            // Make it an error, because it almost certainly is not what the
	            // user wanted.
	            // Also, avoids having to think about corner cases in the tree
	            // manipulation code
	            // that occurs later on.
	            // TODO: this shouldn't be an error; it does happen.
	            Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_RULE_EMPTY_SET);
	        }
	
	        // Advance the RBBI parse postion over the UnicodeSet pattern.
	        // Don't just set fScanIndex because the line/char positions maintained
	        // for error reporting would be thrown off.
	        i = pos.GetIndex();
	        for (;;) {
	            if (fNextIndex >= i) {
	                break;
	            }
	            NextCharLL();
	        }
	
	        RBBINode n;
	
	        n = PushNewNode(IBM.ICU.Text.RBBINode.setRef);
	        n.fFirstPos = startPos;
	        n.fLastPos = fNextIndex;
	        n.fText = fRB.fRules.Substring(n.fFirstPos,(n.fLastPos)-(n.fFirstPos));
	        // findSetFor() serves several purposes here:
	        // - Adopts storage for the UnicodeSet, will be responsible for
	        // deleting.
	        // - Mantains collection of all sets in use, needed later for
	        // establishing
	        // character categories for run time engine.
	        // - Eliminates mulitiple instances of the same set.
	        // - Creates a new uset node if necessary (if this isn't a duplicate.)
	        FindSetFor(n.fText, n, uset);
	    }
 /// <summary>
 /// Constructs an iterator over the given text, starting at the given
 /// position.
 /// </summary>
 ///
 /// <param name="text_0">the text to be iterated</param>
 /// <param name="sym_1">the symbol table, or null if there is none. If sym is null,then variables will not be deferenced, even if thePARSE_VARIABLES option is set.</param>
 /// <param name="pos_2">upon input, the index of the next character to return. If avariable has been dereferenced, then pos will <em>not</em>increment as characters of the variable value are iterated.</param>
 public RuleCharacterIterator(String text_0, SymbolTable sym_1, ILOG.J2CsMapping.Text.ParsePosition pos_2)
 {
     if (text_0 == null || pos_2.GetIndex() > text_0.Length)
     {
         throw new ArgumentException();
     }
     this.text = text_0;
     this.sym  = sym_1;
     this.pos  = pos_2;
     buf       = null;
 }
Esempio n. 3
0
            /*
             * (non-Javadoc)
             *
             * @see com.ibm.icu.text.SymbolTable#parseReference(java.lang.String,
             * java.text.ParsePosition, int)
             */
            public virtual String ParseReference(String text, ILOG.J2CsMapping.Text.ParsePosition pos, int limit)
            {
                int cp;
                int start = pos.GetIndex();
                int i;

                for (i = start; i < limit; i += IBM.ICU.Text.UTF16.GetCharCount(cp))
                {
                    cp = IBM.ICU.Text.UTF16.CharAt(text, i);
                    if (!IBM.ICU.Lang.UCharacter.IsUnicodeIdentifierPart(cp))
                    {
                        break;
                    }
                }
                pos.SetIndex(i);
                return(text.Substring(start, (i) - (start)));
            }
Esempio n. 4
0
        public void Test3860()
        {
            ULocale loc = new ULocale("ja_JP@calendar=japanese");

            IBM.ICU.Util.Calendar cal       = new IBM.ICU.Util.JapaneseCalendar(loc);
            DateFormat            enjformat = cal.GetDateTimeFormat(0, 0, new ULocale(
                                                                        "en_JP@calendar=japanese"));
            DateFormat format = cal.GetDateTimeFormat(0, 0, loc);

            ((SimpleDateFormat)format).ApplyPattern("y.M.d"); // Note: just 'y'
            // doesn't work here.
            ILOG.J2CsMapping.Text.ParsePosition pos = new ILOG.J2CsMapping.Text.ParsePosition(0);
            DateTime aDate = format.Parse("1.1.9", pos); // after the start of heisei
            // accession. Jan 1, 1H
            // wouldn't work because it is
            // actually showa 64
            String inEn = enjformat.Format(aDate);

            cal.Clear();
            cal.SetTime(aDate);
            int gotYear = cal.Get(IBM.ICU.Util.Calendar.YEAR);
            int gotEra  = cal.Get(IBM.ICU.Util.Calendar.ERA);

            int expectYear = 1;
            int expectEra  = IBM.ICU.Util.JapaneseCalendar.CURRENT_ERA;

            if ((gotYear != expectYear) || (gotEra != expectEra))
            {
                Errln("Expected year " + expectYear + ", era " + expectEra
                      + ", but got year " + gotYear + " and era " + gotEra
                      + ", == " + inEn);
            }
            else
            {
                Logln("Got year " + gotYear + " and era " + gotEra + ", == " + inEn);
            }
        }
Esempio n. 5
0
        //
        // RBBISymbolTable::parseReference This function from the abstract symbol
        // table interface
        // looks for a $variable name in the source text.
        // It does not look it up, only scans for it.
        // It is used by the UnicodeSet parser.
        //
        public virtual String ParseReference(String text, ParsePosition pos, int limit)
        {
            int    start  = pos.GetIndex();
            int    i      = start;
            String result = "";

            while (i < limit)
            {
                int c = IBM.ICU.Text.UTF16.CharAt(text, i);
                if ((i == start && !IBM.ICU.Lang.UCharacter.IsUnicodeIdentifierStart(c)) ||
                    !IBM.ICU.Lang.UCharacter.IsUnicodeIdentifierPart(c))
                {
                    break;
                }
                i += IBM.ICU.Text.UTF16.GetCharCount(c);
            }
            if (i == start)       // No valid name chars
            {
                return(result);   // Indicate failure with empty string
            }
            pos.SetIndex(i);
            result = text.Substring(start, (i) - (start));
            return(result);
        }
Esempio n. 6
0
        /// <exclude/>
        /// <summary>
        /// Attempt to parse the given string as a currency, either as a display name
        /// in the given locale, or as a 3-letter ISO 4217 code. If multiple display
        /// names match, then the longest one is selected. If both a display name and
        /// a 3-letter ISO code match, then the display name is preferred, unless
        /// it's length is less than 3.
        /// </summary>
        ///
        /// <param name="locale">the locale of the display names to match</param>
        /// <param name="text">the text to parse</param>
        /// <param name="pos">input-output position; on input, the position within text tomatch; must have 0 <= pos.getIndex() < text.length(); onoutput, the position after the last matched character. If theparse fails, the position in unchanged upon output.</param>
        /// <returns>the ISO 4217 code, as a string, of the best match, or null if
        /// there is no match</returns>
        public static String Parse(ULocale locale, String text, ILOG.J2CsMapping.Text.ParsePosition pos)
        {
            // TODO: There is a slight problem with the pseudo-multi-level
            // fallback implemented here. More-specific locales don't
            // properly shield duplicate entries in less-specific locales.
            // This problem will go away when real multi-level fallback is
            // implemented. We could also fix this by recording (in a
            // hash) which codes are used at each level of fallback, but
            // this doesn't seem warranted.

            int    start    = pos.GetIndex();
            String fragment = text.Substring(start);

            String iso = null;
            int    max = 0;

            // Look up the Currencies resource for the given locale. The
            // Currencies locale data looks like this:
            // |en {
            // | Currencies {
            // | USD { "US$", "US Dollar" }
            // | CHF { "Sw F", "Swiss Franc" }
            // | INR { "=0#Rs|1#Re|1<Rs", "=0#Rupees|1#Rupee|1<Rupees" }
            // | //...
            // | }
            // |}

            // In the future, resource bundles may implement multi-level
            // fallback. That is, if a currency is not found in the en_US
            // Currencies data, then the en Currencies data will be searched.
            // Currently, if a Currencies datum exists in en_US and en, the
            // en_US entry hides that in en.

            // We want multi-level fallback for this resource, so we implement
            // it manually.

            // Multi-level resource inheritance fallback loop

            while (locale != null)
            {
                UResourceBundle rb = IBM.ICU.Util.UResourceBundle.GetBundleInstance(
                    IBM.ICU.Impl.ICUResourceBundle.ICU_BASE_NAME, locale);
                // We can't cast this to String[][]; the cast has to happen later

                try {
                    UResourceBundle currencies = rb.Get("Currencies");
                    // Do a linear search
                    for (int i = 0; i < currencies.GetSize(); ++i)
                    {
                        // String name = ((String[]) currencies[i][1])[0];
                        UResourceBundle item = currencies.Get(i);
                        String          name = item.GetString(0);
                        if (name.Length < 1)
                        {
                            // Ignore zero-length names -- later, change this
                            // when zero-length is used to mean something.
                            continue;
                        }
                        else if (name[0] == '=')
                        {
                            name = name.Substring(1);
                            if (name.Length > 0 && name[0] != '=')
                            {
                                ChoiceFormat choice = new ChoiceFormat(name);
                                // Number n =
                                choice.Parse(text, pos);
                                int len = pos.GetIndex() - start;
                                if (len > max)
                                {
                                    iso = item.GetKey();
                                    max = len;
                                }
                                pos.SetIndex(start);
                                continue;
                            }
                        }
                        if (name.Length > max && fragment.StartsWith(name))
                        {
                            iso = item.GetKey();
                            max = name.Length;
                        }
                    }
                } catch (MissingManifestResourceException e) {
                }

                locale = locale.GetFallback();
            }

            /*
             * 1. Look at the Currencies array from the locale 1a. Iterate through
             * it, and check each row to see if row[1] matches 1a1. If row[1] is a
             * pattern, use ChoiceFormat to attempt a parse 1b. Upon a match, return
             * the ISO code stored at row[0] 2. If there is no match, fall back to
             * "en" and try again 3. If there is no match, fall back to root and try
             * again 4. If still no match, parse 3-letter ISO {this code is probably
             * unchanged}.
             *
             * ICUResourceBundle rb =
             * (ICUResourceBundle)UResourceBundle.getBundleInstance
             * (UResourceBundle.ICU_BASE_NAME, locale); ICUResourceBundle currencies
             * = rb.get("Currencies");
             */
            // If display name parse fails or if it matches fewer than 3
            // characters, try to parse 3-letter ISO. Do this after the
            // display name processing so 3-letter display names are
            // preferred. Consider /[A-Z]{3}/ to be valid ISO, and parse
            // it manually--UnicodeSet/regex are too slow and heavy.
            if (max < 3 && (text.Length - start) >= 3)
            {
                bool valid = true;
                for (int k = 0; k < 3; ++k)
                {
                    char ch = text[start + k];     // 16-bit ok
                    if (ch < 'A' || ch > 'Z')
                    {
                        valid = false;
                        break;
                    }
                }
                if (valid)
                {
                    iso = text.Substring(start, (start + 3) - (start));
                    max = 3;
                }
            }

            pos.SetIndex(start + max);
            return(iso);
        }
        // ----------------------------------------------------------------
        // Private implementation
        // ----------------------------------------------------------------

        /// <summary>
        /// Parse an ID into component pieces. Take IDs of the form T, T/V, S-T,
        /// S-T/V, or S/V-T. If the source is missing, return a source of ANY.
        /// </summary>
        ///
        /// <param name="id">the id string, in any of several forms</param>
        /// <param name="pos">INPUT-OUTPUT parameter. On input, pos[0] is the offset of thefirst character to parse in id. On output, pos[0] is theoffset after the last parsed character. If the parse failed,pos[0] will be unchanged.</param>
        /// <param name="allowFilter">if true, a UnicodeSet pattern is allowed at any locationbetween specs or delimiters, and is returned as the fifthstring in the array.</param>
        /// <returns>a Specs object, or null if the parse failed. If neither source
        /// nor target was seen in the parsed id, then the parse fails. If
        /// allowFilter is true, then the parsed filter pattern is returned
        /// in the Specs object, otherwise the returned filter reference is
        /// null. If the parse fails for any reason null is returned.</returns>
        private static TransliteratorIDParser.Specs  ParseFilterID(String id, int[] pos, bool allowFilter)
        {
            String first     = null;
            String source_0  = null;
            String target_1  = null;
            String variant_2 = null;
            String filter_3  = null;
            char   delimiter = (char)(0);
            int    specCount = 0;
            int    start     = pos[0];

            // This loop parses one of the following things with each
            // pass: a filter, a delimiter character (either '-' or '/'),
            // or a spec (source, target, or variant).
            for (;;)
            {
                IBM.ICU.Impl.Utility.SkipWhitespace(id, pos);
                if (pos[0] == id.Length)
                {
                    break;
                }

                // Parse filters
                if (allowFilter && filter_3 == null &&
                    IBM.ICU.Text.UnicodeSet.ResemblesPattern(id, pos[0]))
                {
                    ILOG.J2CsMapping.Text.ParsePosition ppos = new ILOG.J2CsMapping.Text.ParsePosition(pos[0]);
                    // Parse the set to get the position.
                    new UnicodeSet(id, ppos, null);
                    filter_3 = id.Substring(pos[0], (ppos.GetIndex()) - (pos[0]));
                    pos[0]   = ppos.GetIndex();
                    continue;
                }

                if (delimiter == 0)
                {
                    char c = id[pos[0]];
                    if ((c == TARGET_SEP && target_1 == null) ||
                        (c == VARIANT_SEP && variant_2 == null))
                    {
                        delimiter = c;
                        ++pos[0];
                        continue;
                    }
                }

                // We are about to try to parse a spec with no delimiter
                // when we can no longer do so (we can only do so at the
                // start); break.
                if (delimiter == 0 && specCount > 0)
                {
                    break;
                }

                String spec = IBM.ICU.Impl.Utility.ParseUnicodeIdentifier(id, pos);
                if (spec == null)
                {
                    // Note that if there was a trailing delimiter, we
                    // consume it. So Foo-, Foo/, Foo-Bar/, and Foo/Bar-
                    // are legal.
                    break;
                }

                switch ((int)delimiter)
                {
                case 0:
                    first = spec;
                    break;

                case TARGET_SEP:
                    target_1 = spec;
                    break;

                case VARIANT_SEP:
                    variant_2 = spec;
                    break;
                }
                ++specCount;
                delimiter = ((Char)0);
            }

            // A spec with no prior character is either source or target,
            // depending on whether an explicit "-target" was seen.
            if (first != null)
            {
                if (target_1 == null)
                {
                    target_1 = first;
                }
                else
                {
                    source_0 = first;
                }
            }

            // Must have either source or target
            if (source_0 == null && target_1 == null)
            {
                pos[0] = start;
                return(null);
            }

            // Empty source or target defaults to ANY
            bool sawSource_4 = true;

            if (source_0 == null)
            {
                source_0    = ANY;
                sawSource_4 = false;
            }
            if (target_1 == null)
            {
                target_1 = ANY;
            }

            return(new TransliteratorIDParser.Specs(source_0, target_1, variant_2, sawSource_4, filter_3));
        }
        /// <summary>
        /// Parse a global filter of the form "[f]" or "([f])", depending on
        /// 'withParens'.
        /// </summary>
        ///
        /// <param name="id">the pattern the parse</param>
        /// <param name="pos">INPUT-OUTPUT parameter. On input, the position of the firstcharacter to parse. On output, the position after the lastcharacter parsed.</param>
        /// <param name="dir">the direction.</param>
        /// <param name="withParens">INPUT-OUTPUT parameter. On entry, if withParens[0] is 0, thenparens are disallowed. If it is 1, then parens are requires.If it is -1, then parens are optional, and the return resultwill be set to 0 or 1.</param>
        /// <param name="canonID_0">OUTPUT parameter. The pattern for the filter added to thecanonID, either at the end, if dir is FORWARD, or at thestart, if dir is REVERSE. The pattern will be enclosed inparentheses if appropriate, and will be suffixed with anID_DELIM character. May be null.</param>
        /// <returns>a UnicodeSet object or null. A non-null results indicates a
        /// successful parse, regardless of whether the filter applies to the
        /// given direction. The caller should discard it if withParens !=
        /// (dir == REVERSE).</returns>
        public static UnicodeSet ParseGlobalFilter(String id, int[] pos, int dir,
                                                   int[] withParens, StringBuilder canonID_0)
        {
            UnicodeSet filter_1 = null;
            int        start    = pos[0];

            if (withParens[0] == -1)
            {
                withParens[0] = (IBM.ICU.Impl.Utility.ParseChar(id, pos, OPEN_REV)) ? 1 : 0;
            }
            else if (withParens[0] == 1)
            {
                if (!IBM.ICU.Impl.Utility.ParseChar(id, pos, OPEN_REV))
                {
                    pos[0] = start;
                    return(null);
                }
            }

            IBM.ICU.Impl.Utility.SkipWhitespace(id, pos);

            if (IBM.ICU.Text.UnicodeSet.ResemblesPattern(id, pos[0]))
            {
                ILOG.J2CsMapping.Text.ParsePosition ppos = new ILOG.J2CsMapping.Text.ParsePosition(pos[0]);
                try {
                    filter_1 = new UnicodeSet(id, ppos, null);
                } catch (ArgumentException e) {
                    pos[0] = start;
                    return(null);
                }

                String pattern = id.Substring(pos[0], (ppos.GetIndex()) - (pos[0]));
                pos[0] = ppos.GetIndex();

                if (withParens[0] == 1 && !IBM.ICU.Impl.Utility.ParseChar(id, pos, CLOSE_REV))
                {
                    pos[0] = start;
                    return(null);
                }

                // In the forward direction, append the pattern to the
                // canonID. In the reverse, insert it at zero, and invert
                // the presence of parens ("A" <-> "(A)").
                if (canonID_0 != null)
                {
                    if (dir == FORWARD)
                    {
                        if (withParens[0] == 1)
                        {
                            pattern = OPEN_REV.ToString() + pattern
                                      + CLOSE_REV;
                        }
                        canonID_0.Append(pattern + ID_DELIM);
                    }
                    else
                    {
                        if (withParens[0] == 0)
                        {
                            pattern = OPEN_REV.ToString() + pattern
                                      + CLOSE_REV;
                        }
                        canonID_0.Insert(0, pattern + ID_DELIM);
                    }
                }
            }

            return(filter_1);
        }
Esempio n. 9
0
        /*
         * public int next2() { boolean backedupBefore = backedup; int result =
         * next(); System.out.println(toString(result, backedupBefore)); return
         * result; }
         */

        public int Next()
        {
            if (backedup)
            {
                backedup = false;
                index    = nextIndex;
                return(lastValue);
            }
            int  cp        = 0;
            bool inComment = false;

            // clean off any leading whitespace or comments
            while (true)
            {
                if (index >= source.Length)
                {
                    return(lastValue = DONE);
                }
                cp = NextChar();
                if (inComment)
                {
                    if (NEWLINE.Contains(cp))
                    {
                        inComment = false;
                    }
                }
                else
                {
                    if (cp == '#')
                    {
                        inComment = true;
                    }
                    else if (!whiteSpace.Contains(cp))
                    {
                        break;
                    }
                }
            }
            // record the last index in case we have to backup
            lastIndex = index;

            if (cp == '[')
            {
                ILOG.J2CsMapping.Text.ParsePosition pos = new ILOG.J2CsMapping.Text.ParsePosition(index - 1);
                unicodeSet = new UnicodeSet(source, pos, symbolTable);
                index      = pos.GetIndex();
                return(lastValue = UNICODESET);
            }
            // get syntax character
            if (syntax.Contains(cp))
            {
                return(lastValue = cp);
            }

            // get number, if there is one
            if (IBM.ICU.Lang.UCharacter.GetType(cp) == ILOG.J2CsMapping.Util.Character.DECIMAL_DIGIT_NUMBER)
            {
                number = IBM.ICU.Lang.UCharacter.GetNumericValue(cp);
                while (index < source.Length)
                {
                    cp = NextChar();
                    if (IBM.ICU.Lang.UCharacter.GetType(cp) != ILOG.J2CsMapping.Util.Character.DECIMAL_DIGIT_NUMBER)
                    {
                        index -= IBM.ICU.Text.UTF16.GetCharCount(cp);     // BACKUP!
                        break;
                    }
                    number *= 10;
                    number += IBM.ICU.Lang.UCharacter.GetNumericValue(cp);
                }
                return(lastValue = NUMBER);
            }
            buffer.Length = 0;
            int status = IN_STRING;

            main : {
                while (true)
                {
                    switch (status)
                    {
                    case AFTER_QUOTE :    // check for double ''?
                        if (cp == QUOTE)
                        {
                            IBM.ICU.Text.UTF16.Append(buffer, QUOTE);
                            status = IN_QUOTE;
                            break;
                        }
                        {
                            if (cp == QUOTE)
                            {
                                status = IN_QUOTE;
                            }
                            else if (cp == BSLASH)
                            {
                                status = AFTER_BSLASH;
                            }
                            else if (non_string.Contains(cp))
                            {
                                index -= IBM.ICU.Text.UTF16.GetCharCount(cp);
                                goto gotomain;
                            }
                            else
                            {
                                IBM.ICU.Text.UTF16.Append(buffer, cp);
                            }
                            break;
                        }
                        break;

                    // OTHERWISE FALL THROUGH!!!
                    case IN_STRING:
                        if (cp == QUOTE)
                        {
                            status = IN_QUOTE;
                        }
                        else if (cp == BSLASH)
                        {
                            status = AFTER_BSLASH;
                        }
                        else if (non_string.Contains(cp))
                        {
                            index -= IBM.ICU.Text.UTF16.GetCharCount(cp);     // BACKUP!
                            goto gotomain;
                        }
                        else
                        {
                            IBM.ICU.Text.UTF16.Append(buffer, cp);
                        }
                        break;

                    case IN_QUOTE:
                        if (cp == QUOTE)
                        {
                            status = AFTER_QUOTE;
                        }
                        else
                        {
                            IBM.ICU.Text.UTF16.Append(buffer, cp);
                        }
                        break;

                    case AFTER_BSLASH:
                        switch (cp)
                        {
                        case 'n':
                            cp = '\n';
                            break;

                        case 'r':
                            cp = '\r';
                            break;

                        case 't':
                            cp = '\t';
                            break;
                        }
                        IBM.ICU.Text.UTF16.Append(buffer, cp);
                        status = IN_STRING;
                        break;

                    default:
                        throw new ArgumentException("Internal Error");
                    }
                    if (index >= source.Length)
                    {
                        break;
                    }
                    cp = NextChar();
                }
            }
gotomain:
            ;
            if (status > IN_STRING)
            {
                return(lastValue = UNTERMINATED_QUOTE);
            }
            return(lastValue = STRING);
        }
Esempio n. 10
0
 /// <summary>
 /// DurationFormat cannot parse, by default. This method will throw an
 /// UnsupportedOperationException.
 /// </summary>
 ///
 /// @draft ICU 3.8
 /// @provisional This API might change or be removed in a future release.
 public override Object ParseObject(String source, ILOG.J2CsMapping.Text.ParsePosition pos)
 {
     throw new NotSupportedException();
 }
Esempio n. 11
0
 /*
  * (non-Javadoc)
  *
  * @see com.ibm.icu.text.DateFormat#parse(java.lang.String,
  * com.ibm.icu.util.Calendar, java.text.ParsePosition)
  */
 public override void Parse(String text, Calendar cal, ILOG.J2CsMapping.Text.ParsePosition pos)
 {
     throw new NotSupportedException(
               "Relative Date parse is not implemented yet");
 }