Abstract parent class for analysis factories that create Tokenizer instances.
Inheritance: AbstractAnalysisFactory
Exemplo n.º 1
0
        public virtual void TestBogusLookupTokenizerClass()
        {
            try
            {
                TokenizerFactory.LookupClass("sdfsdfsdfdsfsdfsdf");
                fail();
            }
            catch (Exception expected) when(expected.IsIllegalArgumentException())
            {
                //
            }

            try
            {
                TokenizerFactory.LookupClass("!(**#$U*#$*");
                fail();
            }
            catch (Exception expected) when(expected.IsIllegalArgumentException())
            {
                //
            }
        }
Exemplo n.º 2
0
        public virtual void TestBogusLookupTokenizer()
        {
            try
            {
                TokenizerFactory.ForName("sdfsdfsdfdsfsdfsdf", new Dictionary <string, string>());
                fail();
            }
            catch (Exception expected) when(expected.IsIllegalArgumentException())
            {
                //
            }

            try
            {
                TokenizerFactory.ForName("!(**#$U*#$*", new Dictionary <string, string>());
                fail();
            }
            catch (Exception expected) when(expected.IsIllegalArgumentException())
            {
                //
            }
        }
Exemplo n.º 3
0
        public virtual void TestBogusLookupTokenizerClass()
        {
            try
            {
                TokenizerFactory.LookupClass("sdfsdfsdfdsfsdfsdf");
                fail();
            }
            catch (ArgumentException)
            {
                //
            }

            try
            {
                TokenizerFactory.LookupClass("!(**#$U*#$*");
                fail();
            }
            catch (ArgumentException)
            {
                //
            }
        }
Exemplo n.º 4
0
        public virtual void TestBogusLookupTokenizer()
        {
            try
            {
                TokenizerFactory.ForName("sdfsdfsdfdsfsdfsdf", new Dictionary <string, string>());
                fail();
            }
            catch (System.ArgumentException)
            {
                //
            }

            try
            {
                TokenizerFactory.ForName("!(**#$U*#$*", new Dictionary <string, string>());
                fail();
            }
            catch (System.ArgumentException)
            {
                //
            }
        }
Exemplo n.º 5
0
 public virtual void TestLookupTokenizerClass()
 {
     assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.LookupClass("Whitespace"));
     assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.LookupClass("WHITESPACE"));
     assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.LookupClass("whitespace"));
 }
Exemplo n.º 6
0
 public virtual void TestLookupTokenizer()
 {
     assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.ForName("Whitespace", VersionArgOnly()).GetType());
     assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.ForName("WHITESPACE", VersionArgOnly()).GetType());
     assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.ForName("whitespace", VersionArgOnly()).GetType());
 }
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: private static java.util.List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws java.io.IOException
 private static IList<string> splitByTokenizer(string source, TokenizerFactory tokFactory)
 {
     StringReader reader = new StringReader(source);
     TokenStream ts = loadTokenizer(tokFactory, reader);
     IList<string> tokList = new List<string>();
     try
     {
       CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute));
       ts.reset();
       while (ts.incrementToken())
       {
     if (termAtt.length() > 0)
     {
       tokList.Add(termAtt.ToString());
     }
       }
     }
     finally
     {
       reader.close();
     }
     return tokList;
 }
 private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader)
 {
     return tokFactory.create(reader);
 }
 // a , b c , d e f => [[a],[b,c],[d,e,f]]
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: private static java.util.List<java.util.List<String>> getSynList(String str, String separator, TokenizerFactory tokFactory) throws java.io.IOException
 private static IList<IList<string>> getSynList(string str, string separator, TokenizerFactory tokFactory)
 {
     IList<string> strList = splitSmart(str, separator, false);
     // now split on whitespace to get a list of token strings
     IList<IList<string>> synList = new List<IList<string>>();
     foreach (string toks in strList)
     {
       IList<string> tokList = tokFactory == null ? splitWS(toks, true) : splitByTokenizer(toks, tokFactory);
       synList.Add(tokList);
     }
     return synList;
 }
Exemplo n.º 10
0
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: static void parseRules(Iterable<String> rules, SlowSynonymMap map, String mappingSep, String synSep, boolean expansion, TokenizerFactory tokFactory) throws java.io.IOException
        internal static void parseRules(IEnumerable<string> rules, SlowSynonymMap map, string mappingSep, string synSep, bool expansion, TokenizerFactory tokFactory)
        {
            int count = 0;
            foreach (string rule in rules)
            {
              // To use regexes, we need an expression that specifies an odd number of chars.
              // This can't really be done with string.split(), and since we need to
              // do unescaping at some point anyway, we wouldn't be saving any effort
              // by using regexes.

              IList<string> mapping = splitSmart(rule, mappingSep, false);

              IList<IList<string>> source;
              IList<IList<string>> target;

              if (mapping.Count > 2)
              {
            throw new System.ArgumentException("Invalid Synonym Rule:" + rule);
              }
              else if (mapping.Count == 2)
              {
            source = getSynList(mapping[0], synSep, tokFactory);
            target = getSynList(mapping[1], synSep, tokFactory);
              }
              else
              {
            source = getSynList(mapping[0], synSep, tokFactory);
            if (expansion)
            {
              // expand to all arguments
              target = source;
            }
            else
            {
              // reduce to first argument
              target = new List<>(1);
              target.Add(source[0]);
            }
              }

              bool includeOrig = false;
              foreach (IList<string> fromToks in source)
              {
            count++;
            foreach (IList<string> toToks in target)
            {
              map.add(fromToks, SlowSynonymMap.makeTokens(toToks), includeOrig, true);
            }
              }
            }
        }
Exemplo n.º 11
0
 internal FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter)
 {
     Debug.Assert(tokenizer != null);
     this.tokenizer = tokenizer;
     this.charFilter = charFilter;
     this.tokenfilter = tokenfilter;
 }
 public AnalyzerAnonymousInnerClassHelper(FSTSynonymFilterFactory outerInstance, TokenizerFactory factory)
 {
     this.outerInstance = outerInstance;
     this.factory = factory;
 }
 private static TokenStream LoadTokenizer(TokenizerFactory tokFactory, TextReader reader)
 {
     return tokFactory.Create(reader);
 }
 private static IList<string> SplitByTokenizer(string source, TokenizerFactory tokFactory)
 {
     StringReader reader = new StringReader(source);
     TokenStream ts = LoadTokenizer(tokFactory, reader);
     IList<string> tokList = new List<string>();
     try
     {
         ICharTermAttribute termAtt = ts.AddAttribute<ICharTermAttribute>();
         ts.Reset();
         while (ts.IncrementToken())
         {
             if (termAtt.Length > 0)
             {
                 tokList.Add(termAtt.ToString());
             }
         }
     }
     finally
     {
         reader.Dispose();
     }
     return tokList;
 }