Пример #1
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: static void assertLegalOffsets(String in) throws Exception
        internal static void assertLegalOffsets(string @in)
        {
            int length = @in.Length;
            HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(@in)));
            int ch  = 0;
            int off = 0;

            while ((ch = reader.read()) != -1)
            {
                int correction = reader.correctOffset(off);
                assertTrue("invalid offset correction: " + off + "->" + correction + " for doc of length: " + length, correction <= length);
                off++;
            }
        }
Пример #2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testMSWord14GeneratedHTML() throws Exception
        public virtual void testMSWord14GeneratedHTML()
        {
            System.IO.Stream    stream = this.GetType().getResourceAsStream("MS-Word 14 generated.htm");
            HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(stream, Encoding.UTF8));
            string        gold         = "This is a test";
            StringBuilder builder      = new StringBuilder();
            int           ch           = 0;

            while ((ch = reader.read()) != -1)
            {
                builder.Append((char)ch);
            }
            // Compare trim()'d output to gold
            assertEquals("'" + builder.ToString().Trim() + "' is not equal to '" + gold + "'", gold, builder.ToString().Trim());
        }
Пример #3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomText() throws Exception
        public virtual void testRandomText()
        {
            StringBuilder text          = new StringBuilder();
            int           minNumWords   = 10;
            int           maxNumWords   = 10000;
            int           minWordLength = 3;
            int           maxWordLength = 20;
            int           numWords      = TestUtil.Next(random(), minNumWords, maxNumWords);

            switch (TestUtil.Next(random(), 0, 4))
            {
            case 0:
            {
                for (int wordNum = 0; wordNum < numWords; ++wordNum)
                {
                    text.Append(TestUtil.randomUnicodeString(random(), maxWordLength));
                    text.Append(' ');
                }
                break;
            }

            case 1:
            {
                for (int wordNum = 0; wordNum < numWords; ++wordNum)
                {
                    text.Append(TestUtil.randomRealisticUnicodeString(random(), minWordLength, maxWordLength));
                    text.Append(' ');
                }
                break;
            }

            default:
            {       // ASCII 50% of the time
                for (int wordNum = 0; wordNum < numWords; ++wordNum)
                {
                    text.Append(TestUtil.randomSimpleString(random()));
                    text.Append(' ');
                }
            }
            break;
            }
            Reader reader = new HTMLStripCharFilter(new StringReader(text.ToString()));

            while (reader.read() != -1)
            {
                ;
            }
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void doTestOffsets(String in) throws Exception
        public virtual void doTestOffsets(string @in)
        {
            HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(@in)));
            int ch = 0;
            int off = 0; // offset in the reader
            int strOff = -1; // offset in the original string
            while ((ch = reader.read()) != -1)
            {
              int correctedOff = reader.correctOffset(off);

              if (ch == 'X')
              {
            strOff = @in.IndexOf('X',strOff + 1);
            assertEquals(strOff, correctedOff);
              }

              off++;
            }
        }
Пример #5
0
        //Some sanity checks, but not a full-fledged check
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testHTML() throws Exception
        public virtual void testHTML()
        {
            System.IO.Stream    stream  = this.GetType().getResourceAsStream("htmlStripReaderTest.html");
            HTMLStripCharFilter reader  = new HTMLStripCharFilter(new System.IO.StreamReader(stream, Encoding.UTF8));
            StringBuilder       builder = new StringBuilder();
            int ch = -1;

            while ((ch = reader.read()) != -1)
            {
                builder.Append((char)ch);
            }
            string str = builder.ToString();

            assertTrue("Entity not properly escaped", str.IndexOf("&lt;", StringComparison.Ordinal) == -1);     //there is one > in the text
            assertTrue("Forrest should have been stripped out", str.IndexOf("forrest", StringComparison.Ordinal) == -1 && str.IndexOf("Forrest", StringComparison.Ordinal) == -1);
            assertTrue("File should start with 'Welcome to Solr' after trimming", str.Trim().StartsWith("Welcome to Solr", StringComparison.Ordinal));

            assertTrue("File should start with 'Foundation.' after trimming", str.Trim().EndsWith("Foundation.", StringComparison.Ordinal));
        }
Пример #6
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void doTestOffsets(String in) throws Exception
        public virtual void doTestOffsets(string @in)
        {
            HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(@in)));
            int ch     = 0;
            int off    = 0;  // offset in the reader
            int strOff = -1; // offset in the original string

            while ((ch = reader.read()) != -1)
            {
                int correctedOff = reader.correctOffset(off);

                if (ch == 'X')
                {
                    strOff = @in.IndexOf('X', strOff + 1);
                    assertEquals(strOff, correctedOff);
                }

                off++;
            }
        }
Пример #7
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testReserved() throws Exception
        public virtual void testReserved()
        {
            string        test = "aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>";
            ISet <string> set  = new HashSet <string>();

            set.Add("reserved");
            Reader        reader  = new HTMLStripCharFilter(new StringReader(test), set);
            StringBuilder builder = new StringBuilder();
            int           ch      = 0;

            while ((ch = reader.read()) != -1)
            {
                builder.Append((char)ch);
            }
            string result = builder.ToString();

            // System.out.println("Result: " + result);
            assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", StringComparison.Ordinal), result.IndexOf("reserved", StringComparison.Ordinal) == 9);
            assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 15, StringComparison.Ordinal), result.IndexOf("reserved", 15, StringComparison.Ordinal) == 38);
            assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 41, StringComparison.Ordinal), result.IndexOf("reserved", 41, StringComparison.Ordinal) == 54);
            assertTrue("Other tag should be removed", result.IndexOf("other", StringComparison.Ordinal) == -1);
        }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: static void assertLegalOffsets(String in) throws Exception
 internal static void assertLegalOffsets(string @in)
 {
     int length = @in.Length;
     HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(@in)));
     int ch = 0;
     int off = 0;
     while ((ch = reader.read()) != -1)
     {
       int correction = reader.correctOffset(off);
       assertTrue("invalid offset correction: " + off + "->" + correction + " for doc of length: " + length, correction <= length);
       off++;
     }
 }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testReserved() throws Exception
 public virtual void testReserved()
 {
     string test = "aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>";
     ISet<string> set = new HashSet<string>();
     set.Add("reserved");
     Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
     StringBuilder builder = new StringBuilder();
     int ch = 0;
     while ((ch = reader.read()) != -1)
     {
       builder.Append((char)ch);
     }
     string result = builder.ToString();
     // System.out.println("Result: " + result);
     assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", StringComparison.Ordinal), result.IndexOf("reserved", StringComparison.Ordinal) == 9);
     assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 15, StringComparison.Ordinal), result.IndexOf("reserved", 15, StringComparison.Ordinal) == 38);
     assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 41, StringComparison.Ordinal), result.IndexOf("reserved", 41, StringComparison.Ordinal) == 54);
     assertTrue("Other tag should be removed", result.IndexOf("other", StringComparison.Ordinal) == -1);
 }
Пример #10
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testRandomText() throws Exception
 public virtual void testRandomText()
 {
     StringBuilder text = new StringBuilder();
     int minNumWords = 10;
     int maxNumWords = 10000;
     int minWordLength = 3;
     int maxWordLength = 20;
     int numWords = TestUtil.Next(random(), minNumWords, maxNumWords);
     switch (TestUtil.Next(random(), 0, 4))
     {
       case 0:
       {
     for (int wordNum = 0 ; wordNum < numWords ; ++wordNum)
     {
       text.Append(TestUtil.randomUnicodeString(random(), maxWordLength));
       text.Append(' ');
     }
     break;
       }
       case 1:
       {
     for (int wordNum = 0 ; wordNum < numWords ; ++wordNum)
     {
       text.Append(TestUtil.randomRealisticUnicodeString(random(), minWordLength, maxWordLength));
       text.Append(' ');
     }
     break;
       }
       default:
       { // ASCII 50% of the time
     for (int wordNum = 0 ; wordNum < numWords ; ++wordNum)
     {
       text.Append(TestUtil.randomSimpleString(random()));
       text.Append(' ');
     }
       }
       break;
     }
     Reader reader = new HTMLStripCharFilter(new StringReader(text.ToString()));
     while (reader.read() != -1);
 }
Пример #11
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testMSWord14GeneratedHTML() throws Exception
 public virtual void testMSWord14GeneratedHTML()
 {
     System.IO.Stream stream = this.GetType().getResourceAsStream("MS-Word 14 generated.htm");
     HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(stream, Encoding.UTF8));
     string gold = "This is a test";
     StringBuilder builder = new StringBuilder();
     int ch = 0;
     while ((ch = reader.read()) != -1)
     {
       builder.Append((char)ch);
     }
     // Compare trim()'d output to gold
     assertEquals("'" + builder.ToString().Trim() + "' is not equal to '" + gold + "'", gold, builder.ToString().Trim());
 }
Пример #12
0
        //Some sanity checks, but not a full-fledged check
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testHTML() throws Exception
        public virtual void testHTML()
        {
            System.IO.Stream stream = this.GetType().getResourceAsStream("htmlStripReaderTest.html");
            HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(stream, Encoding.UTF8));
            StringBuilder builder = new StringBuilder();
            int ch = -1;
            while ((ch = reader.read()) != -1)
            {
              builder.Append((char)ch);
            }
            string str = builder.ToString();
            assertTrue("Entity not properly escaped", str.IndexOf("&lt;", StringComparison.Ordinal) == -1); //there is one > in the text
            assertTrue("Forrest should have been stripped out", str.IndexOf("forrest", StringComparison.Ordinal) == -1 && str.IndexOf("Forrest", StringComparison.Ordinal) == -1);
            assertTrue("File should start with 'Welcome to Solr' after trimming", str.Trim().StartsWith("Welcome to Solr", StringComparison.Ordinal));

            assertTrue("File should start with 'Foundation.' after trimming", str.Trim().EndsWith("Foundation.", StringComparison.Ordinal));
        }