Пример #1
0
        public void TestGetTitleInstance()
        {
            BreakIterator bi = BreakIterator.GetTitleInstance(new CultureInfo("en-CA"));

            TestFmwk.assertNotEquals("Title instance break iterator not correctly instantiated", bi.First(), null);
            bi.SetText("Here is some Text");
            TestFmwk.assertEquals("Title instance break iterator not correctly instantiated", bi.First(), 0);
        }
Пример #2
0
 public void Init()
 {
     characterBreak = BreakIterator.GetCharacterInstance();
     wordBreak      = BreakIterator.GetWordInstance();
     lineBreak      = BreakIterator.GetLineInstance();
     //Logln("Creating sentence iterator...");
     sentenceBreak = BreakIterator.GetSentenceInstance();
     //Logln("Finished creating sentence iterator...");
     titleBreak = BreakIterator.GetTitleInstance();
 }
Пример #3
0
        public void TestExtended()
        {
            TestParams tp = new TestParams();


            //
            //  Open and read the test data file.
            //
            StringBuilder testFileBuf = new StringBuilder();
            Stream        @is         = null;

            try
            {
                @is = typeof(RBBITestExtended).GetTypeInfo().Assembly.GetManifestResourceStream("ICU4N.Dev.Test.Rbbi.rbbitst.txt");
                if (@is == null)
                {
                    Errln("Could not open test data file rbbitst.txt");
                    return;
                }
                StreamReader isr = new StreamReader(@is, Encoding.UTF8);
                try
                {
                    int c;
                    int count = 0;
                    for (; ;)
                    {
                        c = isr.Read();
                        if (c < 0)
                        {
                            break;
                        }
                        count++;
                        if (c == 0xFEFF && count == 1)
                        {
                            // BOM in the test data file. Discard it.
                            continue;
                        }

                        testFileBuf.AppendCodePoint(c);
                    }
                }
                finally
                {
                    isr.Dispose();
                }
            }
            catch (IOException e)
            {
                Errln(e.ToString());
                try
                {
                    @is.Dispose();
                }
                catch (IOException ignored)
                {
                }
                return;
            }

            String testString = testFileBuf.ToString();


            const int PARSE_COMMENT = 1;
            const int PARSE_TAG     = 2;
            const int PARSE_DATA    = 3;
            const int PARSE_NUM     = 4;
            const int PARSE_RULES   = 5;

            int parseState = PARSE_TAG;

            int savedState = PARSE_TAG;

            int lineNum  = 1;
            int colStart = 0;
            int column   = 0;
            int charIdx  = 0;
            int i;

            int tagValue = 0;                                   // The numeric value of a <nnn> tag.

            StringBuilder rules          = new StringBuilder(); // Holds rules from a <rules> ... </rules> block
            int           rulesFirstLine = 0;                   // Line number of the start of current <rules> block

            int len = testString.Length;

            for (charIdx = 0; charIdx < len;)
            {
                int c = testString.CodePointAt(charIdx);
                charIdx++;
                if (c == '\r' && charIdx < len && testString[charIdx] == '\n')
                {
                    // treat CRLF as a unit
                    c = '\n';
                    charIdx++;
                }
                if (c == '\n' || c == '\r')
                {
                    lineNum++;
                    colStart = charIdx;
                }
                column = charIdx - colStart + 1;

                switch (parseState)
                {
                case PARSE_COMMENT:
                    if (c == 0x0a || c == 0x0d)
                    {
                        parseState = savedState;
                    }
                    break;

                case PARSE_TAG:
                {
                    if (c == '#')
                    {
                        parseState = PARSE_COMMENT;
                        savedState = PARSE_TAG;
                        break;
                    }
                    if (UCharacter.IsWhitespace(c))
                    {
                        break;
                    }
                    if (testString.StartsWith("<word>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetWordInstance(tp.currentLocale);
                        charIdx += 5;
                        break;
                    }
                    if (testString.StartsWith("<char>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetCharacterInstance(tp.currentLocale);
                        charIdx += 5;
                        break;
                    }
                    if (testString.StartsWith("<line>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetLineInstance(tp.currentLocale);
                        charIdx += 5;
                        break;
                    }
                    if (testString.StartsWith("<sent>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetSentenceInstance(tp.currentLocale);
                        charIdx += 5;
                        break;
                    }
                    if (testString.StartsWith("<title>", charIdx - 1))
                    {
                        tp.bi    = BreakIterator.GetTitleInstance(tp.currentLocale);
                        charIdx += 6;
                        break;
                    }
                    if (testString.StartsWith("<rules>", charIdx - 1) ||
                        testString.StartsWith("<badrules>", charIdx - 1))
                    {
                        charIdx        = testString.IndexOf('>', charIdx) + 1;
                        parseState     = PARSE_RULES;
                        rules.Length   = (0);
                        rulesFirstLine = lineNum;
                        break;
                    }

                    if (testString.StartsWith("<locale ", charIdx - 1))
                    {
                        int closeIndex = testString.IndexOf(">", charIdx);
                        if (closeIndex < 0)
                        {
                            Errln("line" + lineNum + ": missing close on <locale  tag.");
                            break;
                        }
                        String localeName = testString.Substring(charIdx + 6, closeIndex - (charIdx + 6));         // ICU4N: Corrected 2nd parameter
                        localeName       = localeName.Trim();
                        tp.currentLocale = new ULocale(localeName);
                        charIdx          = closeIndex + 1;
                        break;
                    }
                    if (testString.StartsWith("<data>", charIdx - 1))
                    {
                        parseState            = PARSE_DATA;
                        charIdx              += 5;
                        tp.dataToBreak.Length = (0);
                        Arrays.Fill(tp.expectedBreaks, 0);
                        Arrays.Fill(tp.srcCol, 0);
                        Arrays.Fill(tp.srcLine, 0);
                        break;
                    }

                    Errln("line" + lineNum + ": Tag expected in test file.");
                    return;
                    //parseState = PARSE_COMMENT;
                    //savedState = PARSE_DATA;
                }

                case PARSE_RULES:
                    if (testString.StartsWith("</rules>", charIdx - 1))
                    {
                        charIdx   += 7;
                        parseState = PARSE_TAG;
                        try
                        {
                            tp.bi = new RuleBasedBreakIterator(rules.ToString());
                        }
                        catch (ArgumentException e)
                        {
                            Errln(String.Format("rbbitst.txt:{0}  Error creating break iterator from rules.  {1}", lineNum, e));
                        }
                    }
                    else if (testString.StartsWith("</badrules>", charIdx - 1))
                    {
                        charIdx   += 10;
                        parseState = PARSE_TAG;
                        bool goodRules = true;
                        try
                        {
                            new RuleBasedBreakIterator(rules.ToString());
                        }
                        catch (ArgumentException e)
                        {
                            goodRules = false;
                        }
                        if (goodRules)
                        {
                            Errln(String.Format(
                                      "rbbitst.txt:{0}  Expected, but did not get, a failure creating break iterator from rules.",
                                      lineNum));
                        }
                    }
                    else
                    {
                        rules.AppendCodePoint(c);
                    }
                    break;

                case PARSE_DATA:
                    if (c == '•')
                    {
                        int breakIdx = tp.dataToBreak.Length;
                        tp.expectedBreaks[breakIdx] = -1;
                        tp.srcLine[breakIdx]        = lineNum;
                        tp.srcCol[breakIdx]         = column;
                        break;
                    }

                    if (testString.StartsWith("</data>", charIdx - 1))
                    {
                        // Add final entry to mappings from break location to source file position.
                        //  Need one extra because last break position returned is after the
                        //    last char in the data, not at the last char.
                        int idx = tp.dataToBreak.Length;
                        tp.srcLine[idx] = lineNum;
                        tp.srcCol[idx]  = column;

                        parseState = PARSE_TAG;
                        charIdx   += 6;

                        // RUN THE TEST!
                        executeTest(tp);
                        break;
                    }

                    if (testString.StartsWith("\\N{", charIdx - 1))
                    {
                        int nameEndIdx = testString.IndexOf('}', charIdx);
                        if (nameEndIdx == -1)
                        {
                            Errln("Error in named character in test file at line " + lineNum +
                                  ", col " + column);
                        }
                        // Named character, e.g. \N{COMBINING GRAVE ACCENT}
                        // Get the code point from the name and insert it into the test data.
                        String charName = testString.Substring(charIdx + 2, nameEndIdx - (charIdx + 2));     // ICU4N: Corrected 2nd parameter
                        c = UCharacter.GetCharFromName(charName);
                        if (c == -1)
                        {
                            Errln("Error in named character in test file at line " + lineNum +
                                  ", col " + column);
                        }
                        else
                        {
                            // Named code point was recognized.  Insert it
                            //   into the test data.
                            tp.dataToBreak.AppendCodePoint(c);
                            for (i = tp.dataToBreak.Length - 1; i >= 0 && tp.srcLine[i] == 0; i--)
                            {
                                tp.srcLine[i] = lineNum;
                                tp.srcCol[i]  = column;
                            }
                        }
                        if (nameEndIdx > charIdx)
                        {
                            charIdx = nameEndIdx + 1;
                        }
                        break;
                    }

                    if (testString.StartsWith("<>", charIdx - 1))
                    {
                        charIdx++;
                        int breakIdx = tp.dataToBreak.Length;
                        tp.expectedBreaks[breakIdx] = -1;
                        tp.srcLine[breakIdx]        = lineNum;
                        tp.srcCol[breakIdx]         = column;
                        break;
                    }

                    if (c == '<')
                    {
                        tagValue   = 0;
                        parseState = PARSE_NUM;
                        break;
                    }

                    if (c == '#' && column == 3)
                    {       // TODO:  why is column off so far?
                        parseState = PARSE_COMMENT;
                        savedState = PARSE_DATA;
                        break;
                    }

                    if (c == '\\')
                    {
                        // Check for \ at end of line, a line continuation.
                        //     Advance over (discard) the newline
                        int cp = testString.CodePointAt(charIdx);
                        if (cp == '\r' && charIdx < len && testString.CodePointAt(charIdx + 1) == '\n')
                        {
                            // We have a CR LF
                            //  Need an extra increment of the input ptr to move over both of them
                            charIdx++;
                        }
                        if (cp == '\n' || cp == '\r')
                        {
                            lineNum++;
                            column = 0;
                            charIdx++;
                            colStart = charIdx;
                            break;
                        }

                        // Let unescape handle the back slash.
                        int[] charIdxAr = new int[1];
                        charIdxAr[0] = charIdx;
                        cp           = Utility.UnescapeAt(testString, charIdxAr);
                        if (cp != -1)
                        {
                            // Escape sequence was recognized.  Insert the char
                            //   into the test data.
                            charIdx = charIdxAr[0];
                            tp.dataToBreak.AppendCodePoint(cp);
                            for (i = tp.dataToBreak.Length - 1; i >= 0 && tp.srcLine[i] == 0; i--)
                            {
                                tp.srcLine[i] = lineNum;
                                tp.srcCol[i]  = column;
                            }

                            break;
                        }


                        // Not a recognized backslash escape sequence.
                        // Take the next char as a literal.
                        //  TODO:  Should this be an error?
                        c       = testString.CodePointAt(charIdx);
                        charIdx = testString.OffsetByCodePoints(charIdx, 1);
                    }

                    // Normal, non-escaped data char.
                    tp.dataToBreak.AppendCodePoint(c);

                    // Save the mapping from offset in the data to line/column numbers in
                    //   the original input file.  Will be used for better error messages only.
                    //   If there's an expected break before this char, the slot in the mapping
                    //     vector will already be set for this char; don't overwrite it.
                    for (i = tp.dataToBreak.Length - 1; i >= 0 && tp.srcLine[i] == 0; i--)
                    {
                        tp.srcLine[i] = lineNum;
                        tp.srcCol[i]  = column;
                    }
                    break;


                case PARSE_NUM:
                    // We are parsing an expected numeric tag value, like <1234>,
                    //   within a chunk of data.
                    if (UCharacter.IsWhitespace(c))
                    {
                        break;
                    }

                    if (c == '>')
                    {
                        // Finished the number.  Add the info to the expected break data,
                        //   and switch parse state back to doing plain data.
                        parseState = PARSE_DATA;
                        if (tagValue == 0)
                        {
                            tagValue = -1;
                        }
                        int breakIdx = tp.dataToBreak.Length;
                        tp.expectedBreaks[breakIdx] = tagValue;
                        tp.srcLine[breakIdx]        = lineNum;
                        tp.srcCol[breakIdx]         = column;
                        break;
                    }

                    if (UCharacter.IsDigit(c))
                    {
                        tagValue = tagValue * 10 + UCharacter.Digit(c);
                        break;
                    }

                    Errln(String.Format("Syntax Error in rbbitst.txt at line {0}, col {1}", lineNum, column));
                    return;
                }
            }

            // Reached end of test file. Raise an error if parseState indicates that we are
            //   within a block that should have been terminated.
            if (parseState == PARSE_RULES)
            {
                Errln(String.Format("rbbitst.txt:{0} <rules> block beginning at line {1} is not closed.",
                                    lineNum, rulesFirstLine));
            }
            if (parseState == PARSE_DATA)
            {
                Errln(String.Format("rbbitst.txt:{0} <data> block not closed.", lineNum));
            }
        }
Пример #4
0
        public void TestNullLocale()
        {
            CultureInfo loc  = null;
            ULocale     uloc = null;

            BreakIterator brk;

            // Character
            try
            {
                brk = BreakIterator.GetCharacterInstance(loc);
                Errln("getCharacterInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetCharacterInstance(uloc);
                Errln("getCharacterInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }

            // Line
            try
            {
                brk = BreakIterator.GetLineInstance(loc);
                Errln("getLineInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetLineInstance(uloc);
                Errln("getLineInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }

            // Sentence
            try
            {
                brk = BreakIterator.GetSentenceInstance(loc);
                Errln("getSentenceInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetSentenceInstance(uloc);
                Errln("getSentenceInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }

            // Title
            try
            {
                brk = BreakIterator.GetTitleInstance(loc);
                Errln("getTitleInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetTitleInstance(uloc);
                Errln("getTitleInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }

            // Word
            try
            {
                brk = BreakIterator.GetWordInstance(loc);
                Errln("getWordInstance((Locale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
            try
            {
                brk = BreakIterator.GetWordInstance(uloc);
                Errln("getWordInstance((ULocale)null) did not throw NPE.");
            }
            catch (ArgumentNullException e) { /* OK */ }
        }