public virtual void Test() { CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.Add("mtqlpi", ""); builder.Add("mwoknt", "jjp"); builder.Add("tcgyreo", "zpfpajyws"); NormalizeCharMap map = builder.Build(); Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => { Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65); TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas); return(new TokenStreamComponents(t, f)); }, initReader: (fieldName, reader) => { reader = new MockCharFilter(reader, 0); reader = new MappingCharFilter(map, reader); return(reader); }); CheckAnalysisConsistency(Random, a, false, "wmgddzunizdomqyj"); }
// // // 0123456789 //(in) aaaa ll h //(out-1) aa llll i //(out-2) a llllllll i // // aaaa,0,4 => a,0,4 // ll,5,7 => llllllll,5,7 // h,8,9 => i,8,9 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testChained() throws Exception public virtual void testChained() { string testString = "aaaa ll h"; CharFilter cs = new MappingCharFilter(normMap, new MappingCharFilter(normMap, new StringReader(testString))); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); assertTokenStreamContents(ts, new string[] { "a", "llllllll", "i" }, new int[] { 0, 5, 8 }, new int[] { 4, 7, 9 }, testString.Length); }
// // 1111111111222 // 01234567890123456789012 //(in) h i j k ll cccc bbb aa // // 1111111111222 // 01234567890123456789012 //(out) i i jj kkk llll cc b a // // h, 0, 1 => i, 0, 1 // i, 2, 3 => i, 2, 3 // j, 4, 5 => jj, 4, 5 // k, 6, 7 => kkk, 6, 7 // ll, 8,10 => llll, 8,10 // cccc,11,15 => cc,11,15 // bbb,16,19 => b,16,19 // aa,20,22 => a,20,22 // //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTokenStream() throws Exception public virtual void testTokenStream() { string testString = "h i j k ll cccc bbb aa"; CharFilter cs = new MappingCharFilter(normMap, new StringReader(testString)); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); assertTokenStreamContents(ts, new string[] { "i", "i", "jj", "kkk", "llll", "cc", "b", "a" }, new int[] { 0, 2, 4, 6, 8, 11, 16, 20 }, new int[] { 1, 3, 5, 7, 10, 15, 19, 22 }, testString.Length); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNormalizeWinDelimToLinuxDelim() throws Exception public virtual void testNormalizeWinDelimToLinuxDelim() { NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("\\", "/"); NormalizeCharMap normMap = builder.build(); string path = "c:\\a\\b\\c"; Reader cs = new MappingCharFilter(normMap, new StringReader(path)); PathHierarchyTokenizer t = new PathHierarchyTokenizer(cs); assertTokenStreamContents(t, new string[] { "c:", "c:/a", "c:/a/b", "c:/a/b/c" }, new int[] { 0, 0, 0, 0 }, new int[] { 2, 4, 6, 8 }, new int[] { 1, 0, 0, 0 }, path.Length); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReaderReset() throws Exception public virtual void testReaderReset() { CharFilter cs = new MappingCharFilter(normMap, new StringReader("x")); char[] buf = new char[10]; int len = cs.read(buf, 0, 10); assertEquals(1, len); assertEquals('x', buf[0]); len = cs.read(buf, 0, 10); assertEquals(-1, len); // rewind cs.reset(); len = cs.read(buf, 0, 10); assertEquals(1, len); assertEquals('x', buf[0]); }
protected override TextReader InitReader(string fieldName, TextReader reader) { NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); // different apostrophes builder.Add("\u2019", "'"); builder.Add("\u2018", "'"); builder.Add("\u02BC", "'"); builder.Add("`", "'"); builder.Add("´", "'"); // ignored characters builder.Add("\u0301", ""); builder.Add("\u00AD", ""); builder.Add("ґ", "г"); builder.Add("Ґ", "Г"); NormalizeCharMap normMap = builder.Build(); reader = new MappingCharFilter(normMap, reader); return(reader); }
public virtual void TestOffsetCorrection() { const string INPUT = "Günther Günther is here"; // create MappingCharFilter IList <string> mappingRules = new JCG.List <string>(); mappingRules.Add("\"ü\" => \"ü\""); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.Add("ü", "ü"); NormalizeCharMap normMap = builder.Build(); CharFilter charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); // create PatternTokenizer TokenStream stream = new PatternTokenizer(charStream, new Regex("[,;/\\s]+", RegexOptions.Compiled), -1); AssertTokenStreamContents(stream, new string[] { "Günther", "Günther", "is", "here" }, new int[] { 0, 13, 26, 29 }, new int[] { 12, 25, 28, 33 }, INPUT.Length); charStream = new MappingCharFilter(normMap, new StringReader(INPUT)); stream = new PatternTokenizer(charStream, new Regex("Günther", RegexOptions.Compiled), 0); AssertTokenStreamContents(stream, new string[] { "Günther", "Günther" }, new int[] { 0, 13 }, new int[] { 12, 25 }, INPUT.Length); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test1to3() throws Exception public virtual void test1to3() { CharFilter cs = new MappingCharFilter(normMap, new StringReader("k")); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); assertTokenStreamContents(ts, new string[]{"kkk"}, new int[]{0}, new int[]{1}, 1); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test2to1() throws Exception public virtual void test2to1() { CharFilter cs = new MappingCharFilter(normMap, new StringReader("aa")); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); assertTokenStreamContents(ts, new string[]{"a"}, new int[]{0}, new int[]{2}, 2); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testReaderReset() throws Exception public virtual void testReaderReset() { CharFilter cs = new MappingCharFilter(normMap, new StringReader("x")); char[] buf = new char[10]; int len = cs.read(buf, 0, 10); assertEquals(1, len); assertEquals('x', buf[0]); len = cs.read(buf, 0, 10); assertEquals(-1, len); // rewind cs.reset(); len = cs.read(buf, 0, 10); assertEquals(1, len); assertEquals('x', buf[0]); }
// // 1111111111222 // 01234567890123456789012 //(in) h i j k ll cccc bbb aa // // 1111111111222 // 01234567890123456789012 //(out) i i jj kkk llll cc b a // // h, 0, 1 => i, 0, 1 // i, 2, 3 => i, 2, 3 // j, 4, 5 => jj, 4, 5 // k, 6, 7 => kkk, 6, 7 // ll, 8,10 => llll, 8,10 // cccc,11,15 => cc,11,15 // bbb,16,19 => b,16,19 // aa,20,22 => a,20,22 // //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testTokenStream() throws Exception public virtual void testTokenStream() { string testString = "h i j k ll cccc bbb aa"; CharFilter cs = new MappingCharFilter(normMap, new StringReader(testString)); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); assertTokenStreamContents(ts, new string[]{"i","i","jj","kkk","llll","cc","b","a"}, new int[]{0,2,4,6,8,11,16,20}, new int[]{1,3,5,7,10,15,19,22}, testString.Length); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testNonBMPChar() throws Exception public virtual void testNonBMPChar() { CharFilter cs = new MappingCharFilter(normMap, new StringReader(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1))); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); assertTokenStreamContents(ts, new string[]{"fclef"}, new int[]{0}, new int[]{2}, 2); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomMaps2() throws Exception public virtual void testRandomMaps2() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.Random random = random(); Random random = random(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numIterations = atLeast(3); int numIterations = atLeast(3); for (int iter = 0;iter < numIterations;iter++) { if (VERBOSE) { Console.WriteLine("\nTEST iter=" + iter); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final char endLetter = (char) org.apache.lucene.util.TestUtil.nextInt(random, 'b', 'z'); char endLetter = (char) TestUtil.Next(random, 'b', 'z'); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.Map<String,String> map = new java.util.HashMap<>(); IDictionary<string, string> map = new Dictionary<string, string>(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numMappings = atLeast(5); int numMappings = atLeast(5); if (VERBOSE) { Console.WriteLine(" mappings:"); } while (map.Count < numMappings) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String key = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7); string key = TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7); if (key.Length != 0 && !map.ContainsKey(key)) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String value = org.apache.lucene.util.TestUtil.randomSimpleString(random); string value = TestUtil.randomSimpleString(random); map[key] = value; builder.add(key, value); if (VERBOSE) { Console.WriteLine(" " + key + " -> " + value); } } } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final NormalizeCharMap charMap = builder.build(); NormalizeCharMap charMap = builder.build(); if (VERBOSE) { Console.WriteLine(" test random documents..."); } for (int iter2 = 0;iter2 < 100;iter2++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String content = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000)); string content = TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000)); if (VERBOSE) { Console.WriteLine(" content=" + content); } // Do stupid dog-slow mapping: // Output string: //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final StringBuilder output = new StringBuilder(); StringBuilder output = new StringBuilder(); // Maps output offset to input offset: //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.List<Integer> inputOffsets = new java.util.ArrayList<>(); IList<int?> inputOffsets = new List<int?>(); int cumDiff = 0; int charIdx = 0; while (charIdx < content.Length) { int matchLen = -1; string matchRepl = null; foreach (KeyValuePair<string, string> ent in map.SetOfKeyValuePairs()) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String match = ent.getKey(); string match = ent.Key; if (charIdx + match.Length <= content.Length) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int limit = charIdx+match.length(); int limit = charIdx + match.Length; bool matches = true; for (int charIdx2 = charIdx;charIdx2 < limit;charIdx2++) { if (match[charIdx2 - charIdx] != content[charIdx2]) { matches = false; break; } } if (matches) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String repl = ent.getValue(); string repl = ent.Value; if (match.Length > matchLen) { // Greedy: longer match wins matchLen = match.Length; matchRepl = repl; } } } } if (matchLen != -1) { // We found a match here! if (VERBOSE) { Console.WriteLine(" match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl); } output.Append(matchRepl); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int minLen = Math.min(matchLen, matchRepl.length()); int minLen = Math.Min(matchLen, matchRepl.Length); // Common part, directly maps back to input // offset: for (int outIdx = 0;outIdx < minLen;outIdx++) { inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff); } cumDiff += matchLen - matchRepl.Length; charIdx += matchLen; if (matchRepl.Length < matchLen) { // Replacement string is shorter than matched // input: nothing to do } else if (matchRepl.Length > matchLen) { // Replacement string is longer than matched // input: for all the "extra" chars we map // back to a single input offset: for (int outIdx = matchLen;outIdx < matchRepl.Length;outIdx++) { inputOffsets.Add(output.Length + cumDiff - 1); } } else { // Same length: no change to offset } Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length); } else { inputOffsets.Add(output.Length + cumDiff); output.Append(content[charIdx]); charIdx++; } } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String expected = output.toString(); string expected = output.ToString(); if (VERBOSE) { Console.Write(" expected:"); for (int charIdx2 = 0;charIdx2 < expected.Length;charIdx2++) { Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]); } Console.WriteLine(); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final MappingCharFilter mapFilter = new MappingCharFilter(charMap, new java.io.StringReader(content)); MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final StringBuilder actualBuilder = new StringBuilder(); StringBuilder actualBuilder = new StringBuilder(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.List<Integer> actualInputOffsets = new java.util.ArrayList<>(); IList<int?> actualInputOffsets = new List<int?>(); // Now consume the actual mapFilter, somewhat randomly: while (true) { if (random.nextBoolean()) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int ch = mapFilter.read(); int ch = mapFilter.read(); if (ch == -1) { break; } actualBuilder.Append((char) ch); } else { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final char[] buffer = new char[org.apache.lucene.util.TestUtil.nextInt(random, 1, 100)]; char[] buffer = new char[TestUtil.Next(random, 1, 100)]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int off = buffer.length == 1 ? 0 : random.nextInt(buffer.length-1); int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int count = mapFilter.read(buffer, off, buffer.length-off); int count = mapFilter.read(buffer, off, buffer.Length - off); if (count == -1) { break; } else { actualBuilder.Append(buffer, off, count); } } if (random.Next(10) == 7) { // Map offsets while (actualInputOffsets.Count < actualBuilder.Length) { actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count)); } } } // Finish mappping offsets while (actualInputOffsets.Count < actualBuilder.Length) { actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count)); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String actual = actualBuilder.toString(); string actual = actualBuilder.ToString(); // Verify: assertEquals(expected, actual); assertEquals(inputOffsets, actualInputOffsets); } } }
// // // 0123456789 //(in) aaaa ll h //(out-1) aa llll i //(out-2) a llllllll i // // aaaa,0,4 => a,0,4 // ll,5,7 => llllllll,5,7 // h,8,9 => i,8,9 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testChained() throws Exception public virtual void testChained() { string testString = "aaaa ll h"; CharFilter cs = new MappingCharFilter(normMap, new MappingCharFilter(normMap, new StringReader(testString))); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); assertTokenStreamContents(ts, new string[]{"a","llllllll","i"}, new int[]{0,5,8}, new int[]{4,7,9}, testString.Length); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testFullWidthChar() throws Exception public virtual void testFullWidthChar() { CharFilter cs = new MappingCharFilter(normMap, new StringReader("\uff01")); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); assertTokenStreamContents(ts, new string[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void test5to0() throws Exception public virtual void test5to0() { CharFilter cs = new MappingCharFilter(normMap, new StringReader("empty")); TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); assertTokenStreamContents(ts, new string[0], new int[]{}, new int[]{}, 5); }
protected internal override Reader initReader(string fieldName, Reader reader) { reader = new MockCharFilter(reader, 0); reader = new MappingCharFilter(map, reader); return(reader); }
public override TextReader InitReader(string fieldName, TextReader reader) { reader = new MockCharFilter(reader, 0); reader = new MappingCharFilter(map, reader); return(reader); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testRandomMaps2() throws Exception public virtual void testRandomMaps2() { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.Random random = random(); Random random = random(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numIterations = atLeast(3); int numIterations = atLeast(3); for (int iter = 0; iter < numIterations; iter++) { if (VERBOSE) { Console.WriteLine("\nTEST iter=" + iter); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final char endLetter = (char) org.apache.lucene.util.TestUtil.nextInt(random, 'b', 'z'); char endLetter = (char)TestUtil.Next(random, 'b', 'z'); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.Map<String,String> map = new java.util.HashMap<>(); IDictionary <string, string> map = new Dictionary <string, string>(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int numMappings = atLeast(5); int numMappings = atLeast(5); if (VERBOSE) { Console.WriteLine(" mappings:"); } while (map.Count < numMappings) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String key = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7); string key = TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7); if (key.Length != 0 && !map.ContainsKey(key)) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String value = org.apache.lucene.util.TestUtil.randomSimpleString(random); string value = TestUtil.randomSimpleString(random); map[key] = value; builder.add(key, value); if (VERBOSE) { Console.WriteLine(" " + key + " -> " + value); } } } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final NormalizeCharMap charMap = builder.build(); NormalizeCharMap charMap = builder.build(); if (VERBOSE) { Console.WriteLine(" test random documents..."); } for (int iter2 = 0; iter2 < 100; iter2++) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String content = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000)); string content = TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000)); if (VERBOSE) { Console.WriteLine(" content=" + content); } // Do stupid dog-slow mapping: // Output string: //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final StringBuilder output = new StringBuilder(); StringBuilder output = new StringBuilder(); // Maps output offset to input offset: //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.List<Integer> inputOffsets = new java.util.ArrayList<>(); IList <int?> inputOffsets = new List <int?>(); int cumDiff = 0; int charIdx = 0; while (charIdx < content.Length) { int matchLen = -1; string matchRepl = null; foreach (KeyValuePair <string, string> ent in map.SetOfKeyValuePairs()) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String match = ent.getKey(); string match = ent.Key; if (charIdx + match.Length <= content.Length) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int limit = charIdx+match.length(); int limit = charIdx + match.Length; bool matches = true; for (int charIdx2 = charIdx; charIdx2 < limit; charIdx2++) { if (match[charIdx2 - charIdx] != content[charIdx2]) { matches = false; break; } } if (matches) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String repl = ent.getValue(); string repl = ent.Value; if (match.Length > matchLen) { // Greedy: longer match wins matchLen = match.Length; matchRepl = repl; } } } } if (matchLen != -1) { // We found a match here! if (VERBOSE) { Console.WriteLine(" match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl); } output.Append(matchRepl); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int minLen = Math.min(matchLen, matchRepl.length()); int minLen = Math.Min(matchLen, matchRepl.Length); // Common part, directly maps back to input // offset: for (int outIdx = 0; outIdx < minLen; outIdx++) { inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff); } cumDiff += matchLen - matchRepl.Length; charIdx += matchLen; if (matchRepl.Length < matchLen) { // Replacement string is shorter than matched // input: nothing to do } else if (matchRepl.Length > matchLen) { // Replacement string is longer than matched // input: for all the "extra" chars we map // back to a single input offset: for (int outIdx = matchLen; outIdx < matchRepl.Length; outIdx++) { inputOffsets.Add(output.Length + cumDiff - 1); } } else { // Same length: no change to offset } Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length); } else { inputOffsets.Add(output.Length + cumDiff); output.Append(content[charIdx]); charIdx++; } } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String expected = output.toString(); string expected = output.ToString(); if (VERBOSE) { Console.Write(" expected:"); for (int charIdx2 = 0; charIdx2 < expected.Length; charIdx2++) { Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]); } Console.WriteLine(); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final MappingCharFilter mapFilter = new MappingCharFilter(charMap, new java.io.StringReader(content)); MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content)); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final StringBuilder actualBuilder = new StringBuilder(); StringBuilder actualBuilder = new StringBuilder(); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final java.util.List<Integer> actualInputOffsets = new java.util.ArrayList<>(); IList <int?> actualInputOffsets = new List <int?>(); // Now consume the actual mapFilter, somewhat randomly: while (true) { if (random.nextBoolean()) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int ch = mapFilter.read(); int ch = mapFilter.read(); if (ch == -1) { break; } actualBuilder.Append((char)ch); } else { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final char[] buffer = new char[org.apache.lucene.util.TestUtil.nextInt(random, 1, 100)]; char[] buffer = new char[TestUtil.Next(random, 1, 100)]; //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int off = buffer.length == 1 ? 0 : random.nextInt(buffer.length-1); int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1); //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final int count = mapFilter.read(buffer, off, buffer.length-off); int count = mapFilter.read(buffer, off, buffer.Length - off); if (count == -1) { break; } else { actualBuilder.Append(buffer, off, count); } } if (random.Next(10) == 7) { // Map offsets while (actualInputOffsets.Count < actualBuilder.Length) { actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count)); } } } // Finish mappping offsets while (actualInputOffsets.Count < actualBuilder.Length) { actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count)); } //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final String actual = actualBuilder.toString(); string actual = actualBuilder.ToString(); // Verify: assertEquals(expected, actual); assertEquals(inputOffsets, actualInputOffsets); } } }
/// <summary> /// Create the default index if it doesnt already exist /// </summary> /// <returns>The existing or new index</returns> private async Task CreateIndexIfNotExistsAsync(ISearchServiceClient serviceClient, string indexName) { if (Disabled) { throw new Exception($"{nameof(AzureEmployerSearchRepository)} is disabled"); } if (await serviceClient.Indexes.ExistsAsync(indexName)) { return; } var index = new Index { Name = indexName, Fields = FieldBuilder.BuildForType <EmployerSearchModel>() }; index.Suggesters = new List <Suggester> { new Suggester( suggestorName, nameof(EmployerSearchModel.Name), nameof(EmployerSearchModel.PreviousName), nameof(EmployerSearchModel.Abbreviations)) }; var charFilterRemoveAmpersand = new MappingCharFilter("gpg_remove_Ampersand", new List <string> { "&=>" }); var charFilterRemoveDot = new MappingCharFilter("gpg_remove_Dot", new List <string> { ".=>" }); var charFilterRemoveLtdInfoCaseInsensitive = new PatternReplaceCharFilter( "gpg_patternReplaceCharFilter_Ltd", "(?i)(limited|ltd|llp| uk|\\(uk\\)|-uk)[\\.]*", string.Empty); // case insensitive 'limited' 'ltd', 'llp', ' uk', '(uk)', '-uk' followed by zero or more dots (to cater for ltd. and some mis-punctuated limited..) var charFilterRemoveWhitespace = new PatternReplaceCharFilter( "gpg_patternReplaceCharFilter_removeWhitespace", "\\s", string.Empty); index.CharFilters = new List <CharFilter> { charFilterRemoveAmpersand, charFilterRemoveDot, charFilterRemoveLtdInfoCaseInsensitive, charFilterRemoveWhitespace }; var edgeNGramTokenFilterFront = new EdgeNGramTokenFilterV2("gpg_edgeNGram_front", 3, 300, EdgeNGramTokenFilterSide.Front); var edgeNGramTokenFilterBack = new EdgeNGramTokenFilterV2("gpg_edgeNGram_back", 3, 300, EdgeNGramTokenFilterSide.Back); index.TokenFilters = new List <TokenFilter> { edgeNGramTokenFilterFront, edgeNGramTokenFilterBack }; var standardTokenizer = new StandardTokenizerV2("gpg_standard_v2_tokenizer"); var keywordTokenizer = new KeywordTokenizerV2("gpg_keyword_v2_tokenizer"); index.Tokenizers = new List <Tokenizer> { standardTokenizer, keywordTokenizer }; var suffixAnalyzer = new CustomAnalyzer( "gpg_suffix", standardTokenizer.Name, new List <TokenFilterName> { TokenFilterName.Lowercase, edgeNGramTokenFilterBack.Name }, new List <CharFilterName> { charFilterRemoveAmpersand.Name, charFilterRemoveLtdInfoCaseInsensitive.Name }); var completeTokenAnalyzer = new CustomAnalyzer( "gpg_prefix_completeToken", keywordTokenizer.Name, new List <TokenFilterName> { TokenFilterName.Lowercase, edgeNGramTokenFilterFront.Name }, new List <CharFilterName> { charFilterRemoveDot.Name, charFilterRemoveAmpersand.Name, charFilterRemoveLtdInfoCaseInsensitive.Name, charFilterRemoveWhitespace.Name }); index.Analyzers = new List <Analyzer> { suffixAnalyzer, completeTokenAnalyzer }; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForSuffixSearches)).Analyzer = suffixAnalyzer.Name; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForSuffixSearches)).SynonymMaps = new[] { synonymMapName }; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForCompleteTokenSearches)) .Analyzer = completeTokenAnalyzer.Name; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForCompleteTokenSearches)) .SynonymMaps = new[] { synonymMapName }; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.Name)).SynonymMaps = new[] { synonymMapName }; index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PreviousName)).SynonymMaps = new[] { synonymMapName }; //Add the synonyms if they dont already exist if (!await serviceClient.SynonymMaps.ExistsAsync(synonymMapName)) { serviceClient.SynonymMaps.CreateOrUpdate( new SynonymMap { Name = synonymMapName, //Format = "solr", cannot set after upgrade from v5.03 to version 9.0.0 Synonyms = "coop, co-operative" }); } await serviceClient.Indexes.CreateAsync(index); }