//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testOffsetCorrection() throws Exception public virtual void testOffsetCorrection() { const string INPUT = "Günther Günther is here"; // create MappingCharFilter IList<string> mappingRules = new List<string>(); mappingRules.Add("\"ü\" => \"ü\""); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("ü", "ü"); NormalizeCharMap normMap = builder.build(); CharFilter charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); // create PatternTokenizer TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1); assertTokenStreamContents(stream, new string[] {"Günther", "Günther", "is", "here"}, new int[] {0, 13, 26, 29}, new int[] {12, 25, 28, 33}, INPUT.Length); charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); stream = new PatternTokenizer(charStream, Pattern.compile("Günther"), 0); assertTokenStreamContents(stream, new string[] {"Günther", "Günther"}, new int[] {0, 13}, new int[] {12, 25}, INPUT.Length); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testOffsetCorrection() throws Exception public virtual void testOffsetCorrection() { const string INPUT = "Günther Günther is here"; // create MappingCharFilter IList <string> mappingRules = new List <string>(); mappingRules.Add("\"ü\" => \"ü\""); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("ü", "ü"); NormalizeCharMap normMap = builder.build(); CharFilter charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); // create PatternTokenizer TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1); assertTokenStreamContents(stream, new string[] { "Günther", "Günther", "is", "here" }, new int[] { 0, 13, 26, 29 }, new int[] { 12, 25, 28, 33 }, INPUT.Length); charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); stream = new PatternTokenizer(charStream, Pattern.compile("Günther"), 0); assertTokenStreamContents(stream, new string[] { "Günther", "Günther" }, new int[] { 0, 13 }, new int[] { 12, 25 }, INPUT.Length); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testSplitting() throws Exception public virtual void testSplitting() { string qpattern = "\\'([^\\']+)\\'"; // get stuff between "'" string[][] tests = new string[][] { new string[] { "-1", "--", "aaa--bbb--ccc", "aaa bbb ccc" }, new string[] { "-1", ":", "aaa:bbb:ccc", "aaa bbb ccc" }, new string[] { "-1", "\\p{Space}", "aaa bbb \t\tccc ", "aaa bbb ccc" }, new string[] { "-1", ":", "boo:and:foo", "boo and foo" }, new string[] { "-1", "o", "boo:and:foo", "b :and:f" }, new string[] { "0", ":", "boo:and:foo", ": :" }, new string[] { "0", qpattern, "aaa 'bbb' 'ccc'", "'bbb' 'ccc'" }, new string[] { "1", qpattern, "aaa 'bbb' 'ccc'", "bbb ccc" } }; foreach (string[] test in tests) { TokenStream stream = new PatternTokenizer(new StringReader(test[2]), Pattern.compile(test[1]), int.Parse(test[0])); string @out = tsToString(stream); // System.out.println( test[2] + " ==> " + out ); assertEquals("pattern: " + test[1] + " with input: " + test[2], test[3], @out); // Make sure it is the same as if we called 'split' // test disabled, as we remove empty tokens /*if( "-1".equals( test[0] ) ) { * String[] split = test[2].split( test[1] ); * stream = tokenizer.create( new StringReader( test[2] ) ); * int i=0; * for( Token t = stream.next(); null != t; t = stream.next() ) * { * assertEquals( "split: "+test[1] + " "+i, split[i++], new String(t.termBuffer(), 0, t.termLength()) ); * } * }*/ } }
protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new PatternTokenizer(reader, Pattern.compile("a"), 0); return(new Analyzer.TokenStreamComponents(tokenizer)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testSplitting() throws Exception public virtual void testSplitting() { string qpattern = "\\'([^\\']+)\\'"; // get stuff between "'" string[][] tests = new string[][] { new string[] {"-1", "--", "aaa--bbb--ccc", "aaa bbb ccc"}, new string[] {"-1", ":", "aaa:bbb:ccc", "aaa bbb ccc"}, new string[] {"-1", "\\p{Space}", "aaa bbb \t\tccc ", "aaa bbb ccc"}, new string[] {"-1", ":", "boo:and:foo", "boo and foo"}, new string[] {"-1", "o", "boo:and:foo", "b :and:f"}, new string[] {"0", ":", "boo:and:foo", ": :"}, new string[] {"0", qpattern, "aaa 'bbb' 'ccc'", "'bbb' 'ccc'"}, new string[] {"1", qpattern, "aaa 'bbb' 'ccc'", "bbb ccc"} }; foreach (string[] test in tests) { TokenStream stream = new PatternTokenizer(new StringReader(test[2]), Pattern.compile(test[1]), int.Parse(test[0])); string @out = tsToString(stream); // System.out.println( test[2] + " ==> " + out ); assertEquals("pattern: " + test[1] + " with input: " + test[2], test[3], @out); // Make sure it is the same as if we called 'split' // test disabled, as we remove empty tokens /*if( "-1".equals( test[0] ) ) { String[] split = test[2].split( test[1] ); stream = tokenizer.create( new StringReader( test[2] ) ); int i=0; for( Token t = stream.next(); null != t; t = stream.next() ) { assertEquals( "split: "+test[1] + " "+i, split[i++], new String(t.termBuffer(), 0, t.termLength()) ); } }*/ } }
protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer tokenizer = new PatternTokenizer(reader, Pattern.compile("a"), 0); return new Analyzer.TokenStreamComponents(tokenizer); }