// TODO: this should use inputstreams from the loader, not File!
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
	  public virtual void inform(ResourceLoader loader)
	  {
		if (mapping != null)
		{
		  IList<string> wlist = null;
		  File mappingFile = new File(mapping);
		  if (mappingFile.exists())
		  {
			wlist = getLines(loader, mapping);
		  }
		  else
		  {
			IList<string> files = splitFileNames(mapping);
			wlist = new List<>();
			foreach (string file in files)
			{
			  IList<string> lines = getLines(loader, file.Trim());
			  wlist.AddRange(lines);
			}
		  }
		  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
		  parseRules(wlist, builder);
		  normMap = builder.build();
		  if (normMap.map == null)
		  {
			// if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
			// so just set the whole map to null
			normMap = null;
		  }
		}
	  }
        // TODO: this should use inputstreams from the loader, not File!
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
        public virtual void inform(ResourceLoader loader)
        {
            if (mapping != null)
            {
                IList <string> wlist       = null;
                File           mappingFile = new File(mapping);
                if (mappingFile.exists())
                {
                    wlist = getLines(loader, mapping);
                }
                else
                {
                    IList <string> files = splitFileNames(mapping);
                    wlist = new List <>();
                    foreach (string file in files)
                    {
                        IList <string> lines = getLines(loader, file.Trim());
                        wlist.AddRange(lines);
                    }
                }
                NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
                parseRules(wlist, builder);
                normMap = builder.build();
                if (normMap.map == null)
                {
                    // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
                    // so just set the whole map to null
                    normMap = null;
                }
            }
        }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testNormalizeWinDelimToLinuxDelim() throws Exception
        public virtual void testNormalizeWinDelimToLinuxDelim()
        {
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("\\", "/");
            NormalizeCharMap       normMap = builder.build();
            string                 path    = "c:\\a\\b\\c";
            Reader                 cs      = new MappingCharFilter(normMap, new StringReader(path));
            PathHierarchyTokenizer t       = new PathHierarchyTokenizer(cs);

            assertTokenStreamContents(t, new string[] { "c:", "c:/a", "c:/a/b", "c:/a/b/c" }, new int[] { 0, 0, 0, 0 }, new int[] { 2, 4, 6, 8 }, new int[] { 1, 0, 0, 0 }, path.Length);
        }
Esempio n. 4
0
        /// <summary>
        /// test that offsets are correct when mappingcharfilter is previously applied </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testChangedOffsets() throws java.io.IOException
        public virtual void testChangedOffsets()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("a", "一二");
            builder.add("b", "二三");
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap norm = builder.build();
            NormalizeCharMap norm     = builder.build();
            Analyzer         analyzer = new AnalyzerAnonymousInnerClassHelper(this, norm);

            assertAnalyzesTo(analyzer, "ab", new string[] { "一二", "二二", "二三" }, new int[] { 0, 0, 1 }, new int[] { 1, 1, 2 });

            // note: offsets are strange since this is how the charfilter maps them...
            // before bigramming, the 4 tokens look like:
            //   { 0, 0, 1, 1 },
            //   { 0, 1, 1, 2 }
        }
Esempio n. 5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testOffsetCorrection() throws Exception
        public virtual void testOffsetCorrection()
        {
            const string INPUT = "G&uuml;nther G&uuml;nther is here";

            // create MappingCharFilter
            IList <string> mappingRules = new List <string>();

            mappingRules.Add("\"&uuml;\" => \"ü\"");
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("&uuml;", "ü");
            NormalizeCharMap normMap    = builder.build();
            CharFilter       charStream = new MappingCharFilter(normMap, new StringReader(INPUT));

            // create PatternTokenizer
            TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1);

            assertTokenStreamContents(stream, new string[] { "Günther", "Günther", "is", "here" }, new int[] { 0, 13, 26, 29 }, new int[] { 12, 25, 28, 33 }, INPUT.Length);

            charStream = new MappingCharFilter(normMap, new StringReader(INPUT));
            stream     = new PatternTokenizer(charStream, Pattern.compile("Günther"), 0);
            assertTokenStreamContents(stream, new string[] { "Günther", "Günther" }, new int[] { 0, 13 }, new int[] { 12, 25 }, INPUT.Length);
        }
Esempio n. 6
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void test() throws Exception
        public virtual void test()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false);
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);

            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("mtqlpi", "");
            builder.add("mwoknt", "jjp");
            builder.add("tcgyreo", "zpfpajyws");
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build();
            NormalizeCharMap map = builder.build();

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map);

            checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
        }