Пример #1
0
	    // ----------------------------------------------------------------------------------------
	    //
	    // Constructor.
	    //
	    // ----------------------------------------------------------------------------------------
	    internal RBBIRuleScanner(RBBIRuleBuilder rb) {
	        this.kStackSize = 100;
	        this.fC = new RBBIRuleScanner.RBBIRuleChar ();
	        this.fStack = new short[kStackSize];
	        this.fNodeStack = new RBBINode[kStackSize];
	        this.fSetTable = new Hashtable();
	        this.fRuleSets = new UnicodeSet[10];
	        fRB = rb;
	        fLineNum = 1;
	
	        //
	        // Set up the constant Unicode Sets.
	        // Note: These could be made static and shared among
	        // all instances of RBBIRuleScanners.
	        fRuleSets[IBM.ICU.Text.RBBIRuleParseTable.kRuleSet_rule_char - 128] = new UnicodeSet(
	                gRuleSet_rule_char_pattern);
	        fRuleSets[IBM.ICU.Text.RBBIRuleParseTable.kRuleSet_white_space - 128] = new UnicodeSet(
	                gRuleSet_white_space_pattern);
	        fRuleSets[IBM.ICU.Text.RBBIRuleParseTable.kRuleSet_name_char - 128] = new UnicodeSet(
	                gRuleSet_name_char_pattern);
	        fRuleSets[IBM.ICU.Text.RBBIRuleParseTable.kRuleSet_name_start_char - 128] = new UnicodeSet(
	                gRuleSet_name_start_char_pattern);
	        fRuleSets[IBM.ICU.Text.RBBIRuleParseTable.kRuleSet_digit_char - 128] = new UnicodeSet(
	                gRuleSet_digit_char_pattern);
	
	        fSymbolTable = new RBBISymbolTable(this, rb.fRules);
	    }
Пример #2
0
	    // ---------------------------------------------------------------------------------
	    //
	    // nextChar for rules scanning. At this level, we handle stripping
	    // out comments and processing backslash character escapes.
	    // The rest of the rules grammar is handled at the next level up.
	    //
	    // ---------------------------------------------------------------------------------
	    internal void NextChar(RBBIRuleScanner.RBBIRuleChar  c) {
	
	        // Unicode Character constants needed for the processing done by
	        // nextChar(),
	        // in hex because literals wont work on EBCDIC machines.
	
	        fScanIndex = fNextIndex;
	        c.fChar = NextCharLL();
	        c.fEscaped = false;
	
	        //
	        // check for '' sequence.
	        // These are recognized in all contexts, whether in quoted text or not.
	        //
	        if (c.fChar == '\'') {
	            if (IBM.ICU.Text.UTF16.CharAt(fRB.fRules, fNextIndex) == '\'') {
	                c.fChar = NextCharLL(); // get nextChar officially so character
	                                        // counts
	                c.fEscaped = true; // stay correct.
	            } else {
	                // Single quote, by itself.
	                // Toggle quoting mode.
	                // Return either '(' or ')', because quotes cause a grouping of
	                // the quoted text.
	                fQuoteMode = !fQuoteMode;
	                if (fQuoteMode == true) {
	                    c.fChar = '(';
	                } else {
	                    c.fChar = ')';
	                }
	                c.fEscaped = false; // The paren that we return is not escaped.
	                return;
	            }
	        }
	
	        if (fQuoteMode) {
	            c.fEscaped = true;
	        } else {
	            // We are not in a 'quoted region' of the source.
	            //
	            if (c.fChar == '#') {
	                // Start of a comment. Consume the rest of it.
	                // The new-line char that terminates the comment is always
	                // returned.
	                // It will be treated as white-space, and serves to break up
	                // anything
	                // that might otherwise incorrectly clump together with a
	                // comment in
	                // the middle (a variable name, for example.)
	                for (;;) {
	                    c.fChar = NextCharLL();
	                    if (c.fChar == (int) -1
	                            || // EOF
	                            c.fChar == '\r' || c.fChar == '\n'
	                            || c.fChar == chNEL || c.fChar == chLS) {
	                        break;
	                    }
	                }
	            }
	            if (c.fChar == (int) -1) {
	                return;
	            }
	
	            //
	            // check for backslash escaped characters.
	            // Use String.unescapeAt() to handle them.
	            //
	            if (c.fChar == '\\') {
	                c.fEscaped = true;
	                int[] unescapeIndex = new int[1];
	                unescapeIndex[0] = fNextIndex;
	                c.fChar = IBM.ICU.Impl.Utility.UnescapeAt(fRB.fRules, unescapeIndex);
	                if (unescapeIndex[0] == fNextIndex) {
	                    Error(IBM.ICU.Text.RBBIRuleBuilder.U_BRK_HEX_DIGITS_EXPECTED);
	                }
	
	                fCharNum += unescapeIndex[0] - fNextIndex;
	                fNextIndex = unescapeIndex[0];
	            }
	        }
	        // putc(c.fChar, stdout);
	    }