/// <summary> /// test that offsets are correct when mappingcharfilter is previously applied </summary> //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testChangedOffsets() throws java.io.IOException public virtual void testChangedOffsets() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("a", "一二"); builder.add("b", "二三"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap norm = builder.build(); NormalizeCharMap norm = builder.build(); Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, norm); assertAnalyzesTo(analyzer, "ab", new string[] { "一二", "二二", "二三" }, new int[] { 0, 0, 1 }, new int[] { 1, 1, 2 }); // note: offsets are strange since this is how the charfilter maps them... // before bigramming, the 4 tokens look like: // { 0, 0, 1, 1 }, // { 0, 1, 1, 2 } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNormalizeWinDelimToLinuxDelim() throws Exception public virtual void testNormalizeWinDelimToLinuxDelim() { NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("\\", "/"); NormalizeCharMap normMap = builder.build(); string path = "c:\\a\\b\\c"; Reader cs = new MappingCharFilter(normMap, new StringReader(path)); PathHierarchyTokenizer t = new PathHierarchyTokenizer(cs); assertTokenStreamContents(t, new string[] { "c:", "c:/a", "c:/a/b", "c:/a/b/c" }, new int[] { 0, 0, 0, 0 }, new int[] { 2, 4, 6, 8 }, new int[] { 1, 0, 0, 0 }, path.Length); }
protected internal virtual void parseRules(IList <string> rules, NormalizeCharMap.Builder builder) { foreach (string rule in rules) { Matcher m = p.matcher(rule); if (!m.find()) { throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "], file = " + mapping); } builder.add(parseString(m.group(1)), parseString(m.group(2))); } }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test() throws Exception public virtual void test() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false); CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("mtqlpi", ""); builder.add("mwoknt", "jjp"); builder.add("tcgyreo", "zpfpajyws"); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build(); NormalizeCharMap map = builder.build(); Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map); checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj"); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testOffsetCorrection() throws Exception public virtual void testOffsetCorrection() { const string INPUT = "Günther Günther is here"; // create MappingCharFilter IList <string> mappingRules = new List <string>(); mappingRules.Add("\"ü\" => \"ü\""); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("ü", "ü"); NormalizeCharMap normMap = builder.build(); CharFilter charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); // create PatternTokenizer TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1); assertTokenStreamContents(stream, new string[] { "Günther", "Günther", "is", "here" }, new int[] { 0, 13, 26, 29 }, new int[] { 12, 25, 28, 33 }, INPUT.Length); charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); stream = new PatternTokenizer(charStream, Pattern.compile("Günther"), 0); assertTokenStreamContents(stream, new string[] { "Günther", "Günther" }, new int[] { 0, 13 }, new int[] { 12, 25 }, INPUT.Length); }