//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testNormalizeWinDelimToLinuxDelim() throws Exception
 public virtual void testNormalizeWinDelimToLinuxDelim()
 {
     NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
     builder.add("\\", "/");
     NormalizeCharMap normMap = builder.build();
     string path = "c:\\a\\b\\c";
     Reader cs = new MappingCharFilter(normMap, new StringReader(path));
     PathHierarchyTokenizer t = new PathHierarchyTokenizer(cs);
     assertTokenStreamContents(t, new string[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"}, new int[]{0, 0, 0, 0}, new int[]{2, 4, 6, 8}, new int[]{1, 0, 0, 0}, path.Length);
 }
        // SOLR-2891
        // *CompoundWordTokenFilter blindly adds term length to offset, but this can take things out of bounds
        // wrt original text if a previous filter increases the length of the word (in this case ü -> ue)
        // so in this case we behave like WDF, and preserve any modified offsets
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testInvalidOffsets() throws Exception
        public virtual void testInvalidOffsets()
        {
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("fall");
            CharArraySet dict = makeDictionary("fall");

//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("ü", "ue");
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap normMap = builder.build();
            NormalizeCharMap normMap = builder.build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, dict, normMap);

            assertAnalyzesTo(analyzer, "banküberfall", new string[] { "bankueberfall", "fall" }, new int[] { 0, 0 }, new int[] { 12, 12 });
        }
Пример #3
0
        /// <summary>
        /// test that offsets are correct when mappingcharfilter is previously applied </summary>
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testChangedOffsets() throws java.io.IOException
        public virtual void testChangedOffsets()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("a", "一二");
            builder.add("b", "二三");
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap norm = builder.build();
            NormalizeCharMap norm = builder.build();
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, norm);

            assertAnalyzesTo(analyzer, "ab", new string[] {"一二", "二二", "二三"}, new int[] {0, 0, 1}, new int[] {1, 1, 2});

            // note: offsets are strange since this is how the charfilter maps them...
            // before bigramming, the 4 tokens look like:
            //   { 0, 0, 1, 1 },
            //   { 0, 1, 1, 2 }
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testOffsetCorrection() throws Exception
        public virtual void testOffsetCorrection()
        {
            const string INPUT = "G&uuml;nther G&uuml;nther is here";

            // create MappingCharFilter
            IList<string> mappingRules = new List<string>();
            mappingRules.Add("\"&uuml;\" => \"ü\"");
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("&uuml;", "ü");
            NormalizeCharMap normMap = builder.build();
            CharFilter charStream = new MappingCharFilter(normMap, new StringReader(INPUT));

            // create PatternTokenizer
            TokenStream stream = new PatternTokenizer(charStream, Pattern.compile("[,;/\\s]+"), -1);
            assertTokenStreamContents(stream, new string[] {"Günther", "Günther", "is", "here"}, new int[] {0, 13, 26, 29}, new int[] {12, 25, 28, 33}, INPUT.Length);

            charStream = new MappingCharFilter(normMap, new StringReader(INPUT));
            stream = new PatternTokenizer(charStream, Pattern.compile("Günther"), 0);
            assertTokenStreamContents(stream, new string[] {"Günther", "Günther"}, new int[] {0, 13}, new int[] {12, 25}, INPUT.Length);
        }
Пример #5
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void test() throws Exception
        public virtual void test()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false);
            CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);
            cas.add("jjp");
            cas.add("wlmwoknt");
            cas.add("tcgyreo");

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("mtqlpi", "");
            builder.add("mwoknt", "jjp");
            builder.add("tcgyreo", "zpfpajyws");
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build();
            NormalizeCharMap map = builder.build();

            Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map);
            checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
        }
        // SOLR-2891
        // *CompoundWordTokenFilter blindly adds term length to offset, but this can take things out of bounds
        // wrt original text if a previous filter increases the length of the word (in this case ü -> ue)
        // so in this case we behave like WDF, and preserve any modified offsets
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testInvalidOffsets() throws Exception
        public virtual void testInvalidOffsets()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet dict = makeDictionary("fall");
            CharArraySet dict = makeDictionary("fall");
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
            NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
            builder.add("ü", "ue");
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap normMap = builder.build();
            NormalizeCharMap normMap = builder.build();

            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, dict, normMap);

            assertAnalyzesTo(analyzer, "banküberfall", new string[] {"bankueberfall", "fall"}, new int[] {0, 0}, new int[] {12, 12});
        }