Пример #1
0
        public virtual void Test()
        {
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);

            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.Add("mtqlpi", "");
            builder.Add("mwoknt", "jjp");
            builder.Add("tcgyreo", "zpfpajyws");
            NormalizeCharMap map = builder.Build();

            Analyzer a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
            {
                Tokenizer t   = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
                TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas);
                return(new TokenStreamComponents(t, f));
            }, initReader: (fieldName, reader) =>
            {
                reader = new MockCharFilter(reader, 0);
                reader = new MappingCharFilter(map, reader);
                return(reader);
            });

            CheckAnalysisConsistency(Random, a, false, "wmgddzunizdomqyj");
        }
Пример #2
0
        //
        //
        //        0123456789
        //(in)    aaaa ll h
        //(out-1) aa llll i
        //(out-2) a llllllll i
        //
        // aaaa,0,4 => a,0,4
        //   ll,5,7 => llllllll,5,7
        //    h,8,9 => i,8,9
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testChained() throws Exception
        public virtual void testChained()
        {
            string      testString = "aaaa ll h";
            CharFilter  cs         = new MappingCharFilter(normMap, new MappingCharFilter(normMap, new StringReader(testString)));
            TokenStream ts         = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);

            assertTokenStreamContents(ts, new string[] { "a", "llllllll", "i" }, new int[] { 0, 5, 8 }, new int[] { 4, 7, 9 }, testString.Length);
        }
Пример #3
0
        //
        //                1111111111222
        //      01234567890123456789012
        //(in)  h i j k ll cccc bbb aa
        //
        //                1111111111222
        //      01234567890123456789012
        //(out) i i jj kkk llll cc b a
        //
        //    h, 0, 1 =>    i, 0, 1
        //    i, 2, 3 =>    i, 2, 3
        //    j, 4, 5 =>   jj, 4, 5
        //    k, 6, 7 =>  kkk, 6, 7
        //   ll, 8,10 => llll, 8,10
        // cccc,11,15 =>   cc,11,15
        //  bbb,16,19 =>    b,16,19
        //   aa,20,22 =>    a,20,22
        //
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testTokenStream() throws Exception
        public virtual void testTokenStream()
        {
            string      testString = "h i j k ll cccc bbb aa";
            CharFilter  cs         = new MappingCharFilter(normMap, new StringReader(testString));
            TokenStream ts         = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);

            assertTokenStreamContents(ts, new string[] { "i", "i", "jj", "kkk", "llll", "cc", "b", "a" }, new int[] { 0, 2, 4, 6, 8, 11, 16, 20 }, new int[] { 1, 3, 5, 7, 10, 15, 19, 22 }, testString.Length);
        }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testNormalizeWinDelimToLinuxDelim() throws Exception
        public virtual void testNormalizeWinDelimToLinuxDelim()
        {
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("\\", "/");
            NormalizeCharMap       normMap = builder.build();
            string                 path    = "c:\\a\\b\\c";
            Reader                 cs      = new MappingCharFilter(normMap, new StringReader(path));
            PathHierarchyTokenizer t       = new PathHierarchyTokenizer(cs);

            assertTokenStreamContents(t, new string[] { "c:", "c:/a", "c:/a/b", "c:/a/b/c" }, new int[] { 0, 0, 0, 0 }, new int[] { 2, 4, 6, 8 }, new int[] { 1, 0, 0, 0 }, path.Length);
        }
Пример #5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testReaderReset() throws Exception
        public virtual void testReaderReset()
        {
            CharFilter cs = new MappingCharFilter(normMap, new StringReader("x"));

            char[] buf = new char[10];
            int    len = cs.read(buf, 0, 10);

            assertEquals(1, len);
            assertEquals('x', buf[0]);
            len = cs.read(buf, 0, 10);
            assertEquals(-1, len);

            // rewind
            cs.reset();
            len = cs.read(buf, 0, 10);
            assertEquals(1, len);
            assertEquals('x', buf[0]);
        }
        protected override TextReader InitReader(string fieldName, TextReader reader)
        {
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            // different apostrophes
            builder.Add("\u2019", "'");
            builder.Add("\u2018", "'");
            builder.Add("\u02BC", "'");
            builder.Add("`", "'");
            builder.Add("´", "'");
            // ignored characters
            builder.Add("\u0301", "");
            builder.Add("\u00AD", "");
            builder.Add("ґ", "г");
            builder.Add("Ґ", "Г");

            NormalizeCharMap normMap = builder.Build();

            reader = new MappingCharFilter(normMap, reader);
            return(reader);
        }
Пример #7
0
        public virtual void TestOffsetCorrection()
        {
            const string INPUT = "Günther Günther is here";

            // create MappingCharFilter
            IList <string> mappingRules = new JCG.List <string>();

            mappingRules.Add("\"&uuml;\" => \"ü\"");
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.Add("&uuml;", "ü");
            NormalizeCharMap normMap    = builder.Build();
            CharFilter       charStream = new MappingCharFilter(normMap, new StringReader(INPUT));

            // create PatternTokenizer
            TokenStream stream = new PatternTokenizer(charStream, new Regex("[,;/\\s]+", RegexOptions.Compiled), -1);

            AssertTokenStreamContents(stream, new string[] { "Günther", "Günther", "is", "here" }, new int[] { 0, 13, 26, 29 }, new int[] { 12, 25, 28, 33 }, INPUT.Length);

            charStream = new MappingCharFilter(normMap, new StringReader(INPUT));
            stream     = new PatternTokenizer(charStream, new Regex("Günther", RegexOptions.Compiled), 0);
            AssertTokenStreamContents(stream, new string[] { "Günther", "Günther" }, new int[] { 0, 13 }, new int[] { 12, 25 }, INPUT.Length);
        }
Пример #8
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void test1to3() throws Exception
 public virtual void test1to3()
 {
     CharFilter cs = new MappingCharFilter(normMap, new StringReader("k"));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts, new string[]{"kkk"}, new int[]{0}, new int[]{1}, 1);
 }
Пример #9
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void test2to1() throws Exception
 public virtual void test2to1()
 {
     CharFilter cs = new MappingCharFilter(normMap, new StringReader("aa"));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts, new string[]{"a"}, new int[]{0}, new int[]{2}, 2);
 }
Пример #10
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testReaderReset() throws Exception
        public virtual void testReaderReset()
        {
            CharFilter cs = new MappingCharFilter(normMap, new StringReader("x"));
            char[] buf = new char[10];
            int len = cs.read(buf, 0, 10);
            assertEquals(1, len);
            assertEquals('x', buf[0]);
            len = cs.read(buf, 0, 10);
            assertEquals(-1, len);

            // rewind
            cs.reset();
            len = cs.read(buf, 0, 10);
            assertEquals(1, len);
            assertEquals('x', buf[0]);
        }
Пример #11
0
 //
 //                1111111111222
 //      01234567890123456789012
 //(in)  h i j k ll cccc bbb aa
 //
 //                1111111111222
 //      01234567890123456789012
 //(out) i i jj kkk llll cc b a
 //
 //    h, 0, 1 =>    i, 0, 1
 //    i, 2, 3 =>    i, 2, 3
 //    j, 4, 5 =>   jj, 4, 5
 //    k, 6, 7 =>  kkk, 6, 7
 //   ll, 8,10 => llll, 8,10
 // cccc,11,15 =>   cc,11,15
 //  bbb,16,19 =>    b,16,19
 //   aa,20,22 =>    a,20,22
 //
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testTokenStream() throws Exception
 public virtual void testTokenStream()
 {
     string testString = "h i j k ll cccc bbb aa";
     CharFilter cs = new MappingCharFilter(normMap, new StringReader(testString));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts, new string[]{"i","i","jj","kkk","llll","cc","b","a"}, new int[]{0,2,4,6,8,11,16,20}, new int[]{1,3,5,7,10,15,19,22}, testString.Length);
 }
Пример #12
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testNonBMPChar() throws Exception
 public virtual void testNonBMPChar()
 {
     CharFilter cs = new MappingCharFilter(normMap, new StringReader(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1)));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts, new string[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
 }
Пример #13
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandomMaps2() throws Exception
        public virtual void testRandomMaps2()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.Random random = random();
            Random random = random();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numIterations = atLeast(3);
            int numIterations = atLeast(3);
            for (int iter = 0;iter < numIterations;iter++)
            {

              if (VERBOSE)
              {
            Console.WriteLine("\nTEST iter=" + iter);
              }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final char endLetter = (char) org.apache.lucene.util.TestUtil.nextInt(random, 'b', 'z');
              char endLetter = (char) TestUtil.Next(random, 'b', 'z');

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.Map<String,String> map = new java.util.HashMap<>();
              IDictionary<string, string> map = new Dictionary<string, string>();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
              NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numMappings = atLeast(5);
              int numMappings = atLeast(5);
              if (VERBOSE)
              {
            Console.WriteLine("  mappings:");
              }
              while (map.Count < numMappings)
              {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String key = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
            string key = TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
            if (key.Length != 0 && !map.ContainsKey(key))
            {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String value = org.apache.lucene.util.TestUtil.randomSimpleString(random);
              string value = TestUtil.randomSimpleString(random);
              map[key] = value;
              builder.add(key, value);
              if (VERBOSE)
              {
                Console.WriteLine("    " + key + " -> " + value);
              }
            }
              }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final NormalizeCharMap charMap = builder.build();
              NormalizeCharMap charMap = builder.build();

              if (VERBOSE)
              {
            Console.WriteLine("  test random documents...");
              }

              for (int iter2 = 0;iter2 < 100;iter2++)
              {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String content = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));
            string content = TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));

            if (VERBOSE)
            {
              Console.WriteLine("  content=" + content);
            }

            // Do stupid dog-slow mapping:

            // Output string:
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final StringBuilder output = new StringBuilder();
            StringBuilder output = new StringBuilder();

            // Maps output offset to input offset:
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.List<Integer> inputOffsets = new java.util.ArrayList<>();
            IList<int?> inputOffsets = new List<int?>();

            int cumDiff = 0;
            int charIdx = 0;
            while (charIdx < content.Length)
            {

              int matchLen = -1;
              string matchRepl = null;

              foreach (KeyValuePair<string, string> ent in map.SetOfKeyValuePairs())
              {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String match = ent.getKey();
                string match = ent.Key;
                if (charIdx + match.Length <= content.Length)
                {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int limit = charIdx+match.length();
                  int limit = charIdx + match.Length;
                  bool matches = true;
                  for (int charIdx2 = charIdx;charIdx2 < limit;charIdx2++)
                  {
                    if (match[charIdx2 - charIdx] != content[charIdx2])
                    {
                      matches = false;
                      break;
                    }
                  }

                  if (matches)
                  {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String repl = ent.getValue();
                    string repl = ent.Value;
                    if (match.Length > matchLen)
                    {
                      // Greedy: longer match wins
                      matchLen = match.Length;
                      matchRepl = repl;
                    }
                  }
                }
              }

              if (matchLen != -1)
              {
                // We found a match here!
                if (VERBOSE)
                {
                  Console.WriteLine("    match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl);
                }
                output.Append(matchRepl);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int minLen = Math.min(matchLen, matchRepl.length());
                int minLen = Math.Min(matchLen, matchRepl.Length);

                // Common part, directly maps back to input
                // offset:
                for (int outIdx = 0;outIdx < minLen;outIdx++)
                {
                  inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff);
                }

                cumDiff += matchLen - matchRepl.Length;
                charIdx += matchLen;

                if (matchRepl.Length < matchLen)
                {
                  // Replacement string is shorter than matched
                  // input: nothing to do
                }
                else if (matchRepl.Length > matchLen)
                {
                  // Replacement string is longer than matched
                  // input: for all the "extra" chars we map
                  // back to a single input offset:
                  for (int outIdx = matchLen;outIdx < matchRepl.Length;outIdx++)
                  {
                    inputOffsets.Add(output.Length + cumDiff - 1);
                  }
                }
                else
                {
                  // Same length: no change to offset
                }

                Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length);
              }
              else
              {
                inputOffsets.Add(output.Length + cumDiff);
                output.Append(content[charIdx]);
                charIdx++;
              }
            }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String expected = output.toString();
            string expected = output.ToString();
            if (VERBOSE)
            {
              Console.Write("    expected:");
              for (int charIdx2 = 0;charIdx2 < expected.Length;charIdx2++)
              {
                Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]);
              }
              Console.WriteLine();
            }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final MappingCharFilter mapFilter = new MappingCharFilter(charMap, new java.io.StringReader(content));
            MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content));

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final StringBuilder actualBuilder = new StringBuilder();
            StringBuilder actualBuilder = new StringBuilder();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.List<Integer> actualInputOffsets = new java.util.ArrayList<>();
            IList<int?> actualInputOffsets = new List<int?>();

            // Now consume the actual mapFilter, somewhat randomly:
            while (true)
            {
              if (random.nextBoolean())
              {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int ch = mapFilter.read();
                int ch = mapFilter.read();
                if (ch == -1)
                {
                  break;
                }
                actualBuilder.Append((char) ch);
              }
              else
              {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final char[] buffer = new char[org.apache.lucene.util.TestUtil.nextInt(random, 1, 100)];
                char[] buffer = new char[TestUtil.Next(random, 1, 100)];
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int off = buffer.length == 1 ? 0 : random.nextInt(buffer.length-1);
                int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1);
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int count = mapFilter.read(buffer, off, buffer.length-off);
                int count = mapFilter.read(buffer, off, buffer.Length - off);
                if (count == -1)
                {
                  break;
                }
                else
                {
                  actualBuilder.Append(buffer, off, count);
                }
              }

              if (random.Next(10) == 7)
              {
                // Map offsets
                while (actualInputOffsets.Count < actualBuilder.Length)
                {
                  actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
                }
              }
            }

            // Finish mappping offsets
            while (actualInputOffsets.Count < actualBuilder.Length)
            {
              actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
            }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String actual = actualBuilder.toString();
            string actual = actualBuilder.ToString();

            // Verify:
            assertEquals(expected, actual);
            assertEquals(inputOffsets, actualInputOffsets);
              }
            }
        }
Пример #14
0
 //
 //
 //        0123456789
 //(in)    aaaa ll h
 //(out-1) aa llll i
 //(out-2) a llllllll i
 //
 // aaaa,0,4 => a,0,4
 //   ll,5,7 => llllllll,5,7
 //    h,8,9 => i,8,9
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testChained() throws Exception
 public virtual void testChained()
 {
     string testString = "aaaa ll h";
     CharFilter cs = new MappingCharFilter(normMap, new MappingCharFilter(normMap, new StringReader(testString)));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts, new string[]{"a","llllllll","i"}, new int[]{0,5,8}, new int[]{4,7,9}, testString.Length);
 }
Пример #15
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testFullWidthChar() throws Exception
 public virtual void testFullWidthChar()
 {
     CharFilter cs = new MappingCharFilter(normMap, new StringReader("\uff01"));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts, new string[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1);
 }
Пример #16
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void test5to0() throws Exception
 public virtual void test5to0()
 {
     CharFilter cs = new MappingCharFilter(normMap, new StringReader("empty"));
     TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
     assertTokenStreamContents(ts, new string[0], new int[]{}, new int[]{}, 5);
 }
Пример #17
0
 protected internal override Reader initReader(string fieldName, Reader reader)
 {
     reader = new MockCharFilter(reader, 0);
     reader = new MappingCharFilter(map, reader);
     return(reader);
 }
Пример #18
0
 public override TextReader InitReader(string fieldName, TextReader reader)
 {
     reader = new MockCharFilter(reader, 0);
     reader = new MappingCharFilter(map, reader);
     return(reader);
 }
Пример #19
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomMaps2() throws Exception
        public virtual void testRandomMaps2()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.Random random = random();
            Random random = random();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int numIterations = atLeast(3);
            int numIterations = atLeast(3);

            for (int iter = 0; iter < numIterations; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST iter=" + iter);
                }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char endLetter = (char) org.apache.lucene.util.TestUtil.nextInt(random, 'b', 'z');
                char endLetter = (char)TestUtil.Next(random, 'b', 'z');

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.Map<String,String> map = new java.util.HashMap<>();
                IDictionary <string, string> map = new Dictionary <string, string>();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
                NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int numMappings = atLeast(5);
                int numMappings = atLeast(5);
                if (VERBOSE)
                {
                    Console.WriteLine("  mappings:");
                }
                while (map.Count < numMappings)
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String key = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
                    string key = TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
                    if (key.Length != 0 && !map.ContainsKey(key))
                    {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String value = org.apache.lucene.util.TestUtil.randomSimpleString(random);
                        string value = TestUtil.randomSimpleString(random);
                        map[key] = value;
                        builder.add(key, value);
                        if (VERBOSE)
                        {
                            Console.WriteLine("    " + key + " -> " + value);
                        }
                    }
                }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap charMap = builder.build();
                NormalizeCharMap charMap = builder.build();

                if (VERBOSE)
                {
                    Console.WriteLine("  test random documents...");
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String content = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));
                    string content = TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));

                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + content);
                    }

                    // Do stupid dog-slow mapping:

                    // Output string:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final StringBuilder output = new StringBuilder();
                    StringBuilder output = new StringBuilder();

                    // Maps output offset to input offset:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.List<Integer> inputOffsets = new java.util.ArrayList<>();
                    IList <int?> inputOffsets = new List <int?>();

                    int cumDiff = 0;
                    int charIdx = 0;
                    while (charIdx < content.Length)
                    {
                        int    matchLen  = -1;
                        string matchRepl = null;

                        foreach (KeyValuePair <string, string> ent in map.SetOfKeyValuePairs())
                        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String match = ent.getKey();
                            string match = ent.Key;
                            if (charIdx + match.Length <= content.Length)
                            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int limit = charIdx+match.length();
                                int  limit   = charIdx + match.Length;
                                bool matches = true;
                                for (int charIdx2 = charIdx; charIdx2 < limit; charIdx2++)
                                {
                                    if (match[charIdx2 - charIdx] != content[charIdx2])
                                    {
                                        matches = false;
                                        break;
                                    }
                                }

                                if (matches)
                                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String repl = ent.getValue();
                                    string repl = ent.Value;
                                    if (match.Length > matchLen)
                                    {
                                        // Greedy: longer match wins
                                        matchLen  = match.Length;
                                        matchRepl = repl;
                                    }
                                }
                            }
                        }

                        if (matchLen != -1)
                        {
                            // We found a match here!
                            if (VERBOSE)
                            {
                                Console.WriteLine("    match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl);
                            }
                            output.Append(matchRepl);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int minLen = Math.min(matchLen, matchRepl.length());
                            int minLen = Math.Min(matchLen, matchRepl.Length);

                            // Common part, directly maps back to input
                            // offset:
                            for (int outIdx = 0; outIdx < minLen; outIdx++)
                            {
                                inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff);
                            }

                            cumDiff += matchLen - matchRepl.Length;
                            charIdx += matchLen;

                            if (matchRepl.Length < matchLen)
                            {
                                // Replacement string is shorter than matched
                                // input: nothing to do
                            }
                            else if (matchRepl.Length > matchLen)
                            {
                                // Replacement string is longer than matched
                                // input: for all the "extra" chars we map
                                // back to a single input offset:
                                for (int outIdx = matchLen; outIdx < matchRepl.Length; outIdx++)
                                {
                                    inputOffsets.Add(output.Length + cumDiff - 1);
                                }
                            }
                            else
                            {
                                // Same length: no change to offset
                            }

                            Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length);
                        }
                        else
                        {
                            inputOffsets.Add(output.Length + cumDiff);
                            output.Append(content[charIdx]);
                            charIdx++;
                        }
                    }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String expected = output.toString();
                    string expected = output.ToString();
                    if (VERBOSE)
                    {
                        Console.Write("    expected:");
                        for (int charIdx2 = 0; charIdx2 < expected.Length; charIdx2++)
                        {
                            Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]);
                        }
                        Console.WriteLine();
                    }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final MappingCharFilter mapFilter = new MappingCharFilter(charMap, new java.io.StringReader(content));
                    MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content));

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final StringBuilder actualBuilder = new StringBuilder();
                    StringBuilder actualBuilder = new StringBuilder();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.List<Integer> actualInputOffsets = new java.util.ArrayList<>();
                    IList <int?> actualInputOffsets = new List <int?>();

                    // Now consume the actual mapFilter, somewhat randomly:
                    while (true)
                    {
                        if (random.nextBoolean())
                        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int ch = mapFilter.read();
                            int ch = mapFilter.read();
                            if (ch == -1)
                            {
                                break;
                            }
                            actualBuilder.Append((char)ch);
                        }
                        else
                        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char[] buffer = new char[org.apache.lucene.util.TestUtil.nextInt(random, 1, 100)];
                            char[] buffer = new char[TestUtil.Next(random, 1, 100)];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int off = buffer.length == 1 ? 0 : random.nextInt(buffer.length-1);
                            int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int count = mapFilter.read(buffer, off, buffer.length-off);
                            int count = mapFilter.read(buffer, off, buffer.Length - off);
                            if (count == -1)
                            {
                                break;
                            }
                            else
                            {
                                actualBuilder.Append(buffer, off, count);
                            }
                        }

                        if (random.Next(10) == 7)
                        {
                            // Map offsets
                            while (actualInputOffsets.Count < actualBuilder.Length)
                            {
                                actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
                            }
                        }
                    }

                    // Finish mappping offsets
                    while (actualInputOffsets.Count < actualBuilder.Length)
                    {
                        actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
                    }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String actual = actualBuilder.toString();
                    string actual = actualBuilder.ToString();

                    // Verify:
                    assertEquals(expected, actual);
                    assertEquals(inputOffsets, actualInputOffsets);
                }
            }
        }
        /// <summary>
        ///     Create the default index if it doesnt already exist
        /// </summary>
        /// <returns>The existing or new index</returns>
        private async Task CreateIndexIfNotExistsAsync(ISearchServiceClient serviceClient, string indexName)
        {
            if (Disabled)
            {
                throw new Exception($"{nameof(AzureEmployerSearchRepository)} is disabled");
            }

            if (await serviceClient.Indexes.ExistsAsync(indexName))
            {
                return;
            }

            var index = new Index {
                Name = indexName, Fields = FieldBuilder.BuildForType <EmployerSearchModel>()
            };

            index.Suggesters = new List <Suggester>
            {
                new Suggester(
                    suggestorName,
                    nameof(EmployerSearchModel.Name),
                    nameof(EmployerSearchModel.PreviousName),
                    nameof(EmployerSearchModel.Abbreviations))
            };

            var charFilterRemoveAmpersand = new MappingCharFilter("gpg_remove_Ampersand", new List <string> {
                "&=>"
            });
            var charFilterRemoveDot = new MappingCharFilter("gpg_remove_Dot", new List <string> {
                ".=>"
            });
            var charFilterRemoveLtdInfoCaseInsensitive = new PatternReplaceCharFilter(
                "gpg_patternReplaceCharFilter_Ltd",
                "(?i)(limited|ltd|llp| uk|\\(uk\\)|-uk)[\\.]*",
                string.Empty); // case insensitive 'limited' 'ltd', 'llp', ' uk', '(uk)', '-uk' followed by zero or more dots (to cater for ltd. and some mis-punctuated limited..)
            var charFilterRemoveWhitespace = new PatternReplaceCharFilter(
                "gpg_patternReplaceCharFilter_removeWhitespace",
                "\\s",
                string.Empty);

            index.CharFilters = new List <CharFilter>
            {
                charFilterRemoveAmpersand, charFilterRemoveDot, charFilterRemoveLtdInfoCaseInsensitive,
                charFilterRemoveWhitespace
            };

            var edgeNGramTokenFilterFront =
                new EdgeNGramTokenFilterV2("gpg_edgeNGram_front", 3, 300, EdgeNGramTokenFilterSide.Front);
            var edgeNGramTokenFilterBack =
                new EdgeNGramTokenFilterV2("gpg_edgeNGram_back", 3, 300, EdgeNGramTokenFilterSide.Back);

            index.TokenFilters = new List <TokenFilter> {
                edgeNGramTokenFilterFront, edgeNGramTokenFilterBack
            };

            var standardTokenizer = new StandardTokenizerV2("gpg_standard_v2_tokenizer");
            var keywordTokenizer  = new KeywordTokenizerV2("gpg_keyword_v2_tokenizer");

            index.Tokenizers = new List <Tokenizer> {
                standardTokenizer, keywordTokenizer
            };

            var suffixAnalyzer = new CustomAnalyzer(
                "gpg_suffix",
                standardTokenizer.Name,
                new List <TokenFilterName> {
                TokenFilterName.Lowercase, edgeNGramTokenFilterBack.Name
            },
                new List <CharFilterName> {
                charFilterRemoveAmpersand.Name, charFilterRemoveLtdInfoCaseInsensitive.Name
            });

            var completeTokenAnalyzer = new CustomAnalyzer(
                "gpg_prefix_completeToken",
                keywordTokenizer.Name,
                new List <TokenFilterName> {
                TokenFilterName.Lowercase, edgeNGramTokenFilterFront.Name
            },
                new List <CharFilterName>
            {
                charFilterRemoveDot.Name,
                charFilterRemoveAmpersand.Name,
                charFilterRemoveLtdInfoCaseInsensitive.Name,
                charFilterRemoveWhitespace.Name
            });

            index.Analyzers = new List <Analyzer> {
                suffixAnalyzer, completeTokenAnalyzer
            };

            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForSuffixSearches)).Analyzer =
                suffixAnalyzer.Name;
            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForSuffixSearches)).SynonymMaps =
                new[] { synonymMapName };

            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForCompleteTokenSearches))
            .Analyzer =
                completeTokenAnalyzer.Name;
            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PartialNameForCompleteTokenSearches))
            .SynonymMaps =
                new[] { synonymMapName };

            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.Name)).SynonymMaps         = new[] { synonymMapName };
            index.Fields.First(f => f.Name == nameof(EmployerSearchModel.PreviousName)).SynonymMaps =
                new[] { synonymMapName };

            //Add the synonyms if they dont already exist
            if (!await serviceClient.SynonymMaps.ExistsAsync(synonymMapName))
            {
                serviceClient.SynonymMaps.CreateOrUpdate(
                    new SynonymMap
                {
                    Name = synonymMapName,
                    //Format = "solr", cannot set after upgrade from v5.03 to version 9.0.0
                    Synonyms = "coop, co-operative"
                });
            }

            await serviceClient.Indexes.CreateAsync(index);
        }