Holds a map of String input to String output, to be used with MappingCharFilter. Use the Builder to create this.
	  // TODO: this should use inputstreams from the loader, not File!
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
	  public virtual void inform(ResourceLoader loader)
	  {
		if (mapping != null)
		{
		  IList<string> wlist = null;
		  File mappingFile = new File(mapping);
		  if (mappingFile.exists())
		  {
			wlist = getLines(loader, mapping);
		  }
		  else
		  {
			IList<string> files = splitFileNames(mapping);
			wlist = new List<>();
			foreach (string file in files)
			{
			  IList<string> lines = getLines(loader, file.Trim());
			  wlist.AddRange(lines);
			}
		  }
		  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
		  parseRules(wlist, builder);
		  normMap = builder.build();
		  if (normMap.map == null)
		  {
			// if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
			// so just set the whole map to null
			normMap = null;
		  }
		}
	  }
Ejemplo n.º 2
0
        //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomMaps() throws Exception
        public virtual void testRandomMaps()
        {
            int numIterations = atLeast(3);

            for (int i = 0; i < numIterations; i++)
            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap map = randomMap();
                NormalizeCharMap map      = randomMap();
                Analyzer         analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map);
                int numRounds             = 100;
                checkRandomData(random(), analyzer, numRounds);
            }
        }
Ejemplo n.º 3
0
	  /// <summary>
	  /// Default constructor that takes a <seealso cref="Reader"/>. </summary>
	  public MappingCharFilter(NormalizeCharMap normMap, TextReader @in) : base(@in)
	  {
		buffer.Reset(@in);

		map = normMap.map;
		cachedRootArcs = normMap.cachedRootArcs;

		if (map != null)
		{
		  fstReader = map.BytesReader;
		}
		else
		{
		  fstReader = null;
		}
	  }
Ejemplo n.º 4
0
        //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testFinalOffsetSpecialCase() throws Exception
        public virtual void testFinalOffsetSpecialCase()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("t", "");
            // even though this below rule has no effect, the test passes if you remove it!!
            builder.add("tmakdbl", "c");

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap map = builder.build();
            NormalizeCharMap map = builder.build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);

            string text = "gzw f quaxot";

            checkAnalysisConsistency(random(), analyzer, false, text);
        }
Ejemplo n.º 5
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: @Override public void setUp() throws Exception
        public override void setUp()
        {
            base.setUp();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();

            builder.add("aa", "a");
            builder.add("bbb", "b");
            builder.add("cccc", "cc");

            builder.add("h", "i");
            builder.add("j", "jj");
            builder.add("k", "kkk");
            builder.add("ll", "llll");

            builder.add("empty", "");

            // BMP (surrogate pair):
            builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");

            builder.add("\uff01", "full-width-exclamation");

            normMap = builder.build();
        }
Ejemplo n.º 6
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void setUp() throws Exception
        public override void setUp()
        {
            base.setUp();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();

            builder.add("aa", "a");
            builder.add("bbb", "b");
            builder.add("cccc", "cc");

            builder.add("h", "i");
            builder.add("j", "jj");
            builder.add("k", "kkk");
            builder.add("ll", "llll");

            builder.add("empty", "");

            // BMP (surrogate pair):
            builder.add(UnicodeUtil.newString(new int[] { 0x1D122 }, 0, 1), "fclef");

            builder.add("\uff01", "full-width-exclamation");

            normMap = builder.build();
        }
Ejemplo n.º 7
0
 public AnalyzerAnonymousInnerClassHelper3(TestMappingCharFilter outerInstance, NormalizeCharMap map)
 {
     this.outerInstance = outerInstance;
       this.map = map;
 }
Ejemplo n.º 8
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testRandomMaps2() throws Exception
        public virtual void testRandomMaps2()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.Random random = random();
            Random random = random();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int numIterations = atLeast(3);
            int numIterations = atLeast(3);

            for (int iter = 0; iter < numIterations; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST iter=" + iter);
                }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char endLetter = (char) org.apache.lucene.util.TestUtil.nextInt(random, 'b', 'z');
                char endLetter = (char)TestUtil.Next(random, 'b', 'z');

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.Map<String,String> map = new java.util.HashMap<>();
                IDictionary <string, string> map = new Dictionary <string, string>();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
                NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int numMappings = atLeast(5);
                int numMappings = atLeast(5);
                if (VERBOSE)
                {
                    Console.WriteLine("  mappings:");
                }
                while (map.Count < numMappings)
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String key = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
                    string key = TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
                    if (key.Length != 0 && !map.ContainsKey(key))
                    {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String value = org.apache.lucene.util.TestUtil.randomSimpleString(random);
                        string value = TestUtil.randomSimpleString(random);
                        map[key] = value;
                        builder.add(key, value);
                        if (VERBOSE)
                        {
                            Console.WriteLine("    " + key + " -> " + value);
                        }
                    }
                }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final NormalizeCharMap charMap = builder.build();
                NormalizeCharMap charMap = builder.build();

                if (VERBOSE)
                {
                    Console.WriteLine("  test random documents...");
                }

                for (int iter2 = 0; iter2 < 100; iter2++)
                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String content = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));
                    string content = TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));

                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + content);
                    }

                    // Do stupid dog-slow mapping:

                    // Output string:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final StringBuilder output = new StringBuilder();
                    StringBuilder output = new StringBuilder();

                    // Maps output offset to input offset:
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.List<Integer> inputOffsets = new java.util.ArrayList<>();
                    IList <int?> inputOffsets = new List <int?>();

                    int cumDiff = 0;
                    int charIdx = 0;
                    while (charIdx < content.Length)
                    {
                        int    matchLen  = -1;
                        string matchRepl = null;

                        foreach (KeyValuePair <string, string> ent in map.SetOfKeyValuePairs())
                        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String match = ent.getKey();
                            string match = ent.Key;
                            if (charIdx + match.Length <= content.Length)
                            {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int limit = charIdx+match.length();
                                int  limit   = charIdx + match.Length;
                                bool matches = true;
                                for (int charIdx2 = charIdx; charIdx2 < limit; charIdx2++)
                                {
                                    if (match[charIdx2 - charIdx] != content[charIdx2])
                                    {
                                        matches = false;
                                        break;
                                    }
                                }

                                if (matches)
                                {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String repl = ent.getValue();
                                    string repl = ent.Value;
                                    if (match.Length > matchLen)
                                    {
                                        // Greedy: longer match wins
                                        matchLen  = match.Length;
                                        matchRepl = repl;
                                    }
                                }
                            }
                        }

                        if (matchLen != -1)
                        {
                            // We found a match here!
                            if (VERBOSE)
                            {
                                Console.WriteLine("    match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl);
                            }
                            output.Append(matchRepl);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int minLen = Math.min(matchLen, matchRepl.length());
                            int minLen = Math.Min(matchLen, matchRepl.Length);

                            // Common part, directly maps back to input
                            // offset:
                            for (int outIdx = 0; outIdx < minLen; outIdx++)
                            {
                                inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff);
                            }

                            cumDiff += matchLen - matchRepl.Length;
                            charIdx += matchLen;

                            if (matchRepl.Length < matchLen)
                            {
                                // Replacement string is shorter than matched
                                // input: nothing to do
                            }
                            else if (matchRepl.Length > matchLen)
                            {
                                // Replacement string is longer than matched
                                // input: for all the "extra" chars we map
                                // back to a single input offset:
                                for (int outIdx = matchLen; outIdx < matchRepl.Length; outIdx++)
                                {
                                    inputOffsets.Add(output.Length + cumDiff - 1);
                                }
                            }
                            else
                            {
                                // Same length: no change to offset
                            }

                            Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length);
                        }
                        else
                        {
                            inputOffsets.Add(output.Length + cumDiff);
                            output.Append(content[charIdx]);
                            charIdx++;
                        }
                    }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String expected = output.toString();
                    string expected = output.ToString();
                    if (VERBOSE)
                    {
                        Console.Write("    expected:");
                        for (int charIdx2 = 0; charIdx2 < expected.Length; charIdx2++)
                        {
                            Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]);
                        }
                        Console.WriteLine();
                    }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final MappingCharFilter mapFilter = new MappingCharFilter(charMap, new java.io.StringReader(content));
                    MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content));

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final StringBuilder actualBuilder = new StringBuilder();
                    StringBuilder actualBuilder = new StringBuilder();
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final java.util.List<Integer> actualInputOffsets = new java.util.ArrayList<>();
                    IList <int?> actualInputOffsets = new List <int?>();

                    // Now consume the actual mapFilter, somewhat randomly:
                    while (true)
                    {
                        if (random.nextBoolean())
                        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int ch = mapFilter.read();
                            int ch = mapFilter.read();
                            if (ch == -1)
                            {
                                break;
                            }
                            actualBuilder.Append((char)ch);
                        }
                        else
                        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final char[] buffer = new char[org.apache.lucene.util.TestUtil.nextInt(random, 1, 100)];
                            char[] buffer = new char[TestUtil.Next(random, 1, 100)];
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int off = buffer.length == 1 ? 0 : random.nextInt(buffer.length-1);
                            int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int count = mapFilter.read(buffer, off, buffer.length-off);
                            int count = mapFilter.read(buffer, off, buffer.Length - off);
                            if (count == -1)
                            {
                                break;
                            }
                            else
                            {
                                actualBuilder.Append(buffer, off, count);
                            }
                        }

                        if (random.Next(10) == 7)
                        {
                            // Map offsets
                            while (actualInputOffsets.Count < actualBuilder.Length)
                            {
                                actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
                            }
                        }
                    }

                    // Finish mappping offsets
                    while (actualInputOffsets.Count < actualBuilder.Length)
                    {
                        actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
                    }

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final String actual = actualBuilder.toString();
                    string actual = actualBuilder.ToString();

                    // Verify:
                    assertEquals(expected, actual);
                    assertEquals(inputOffsets, actualInputOffsets);
                }
            }
        }
Ejemplo n.º 9
0
 public AnalyzerAnonymousInnerClassHelper3(TestMappingCharFilter outerInstance, NormalizeCharMap map)
 {
     this.outerInstance = outerInstance;
     this.map           = map;
 }
	  protected internal virtual void parseRules(IList<string> rules, NormalizeCharMap.Builder builder)
	  {
		foreach (string rule in rules)
		{
		  Matcher m = p.matcher(rule);
		  if (!m.find())
		  {
			throw new System.ArgumentException("Invalid Mapping Rule : [" + rule + "], file = " + mapping);
		  }
		  builder.add(parseString(m.group(1)), parseString(m.group(2)));
		}
	  }