public virtual void TestBogusLookupTokenizerClass() { try { TokenizerFactory.LookupClass("sdfsdfsdfdsfsdfsdf"); fail(); } catch (Exception expected) when(expected.IsIllegalArgumentException()) { // } try { TokenizerFactory.LookupClass("!(**#$U*#$*"); fail(); } catch (Exception expected) when(expected.IsIllegalArgumentException()) { // } }
public virtual void TestBogusLookupTokenizer() { try { TokenizerFactory.ForName("sdfsdfsdfdsfsdfsdf", new Dictionary <string, string>()); fail(); } catch (Exception expected) when(expected.IsIllegalArgumentException()) { // } try { TokenizerFactory.ForName("!(**#$U*#$*", new Dictionary <string, string>()); fail(); } catch (Exception expected) when(expected.IsIllegalArgumentException()) { // } }
public virtual void TestBogusLookupTokenizerClass() { try { TokenizerFactory.LookupClass("sdfsdfsdfdsfsdfsdf"); fail(); } catch (ArgumentException) { // } try { TokenizerFactory.LookupClass("!(**#$U*#$*"); fail(); } catch (ArgumentException) { // } }
public virtual void TestBogusLookupTokenizer() { try { TokenizerFactory.ForName("sdfsdfsdfdsfsdfsdf", new Dictionary <string, string>()); fail(); } catch (System.ArgumentException) { // } try { TokenizerFactory.ForName("!(**#$U*#$*", new Dictionary <string, string>()); fail(); } catch (System.ArgumentException) { // } }
public virtual void TestLookupTokenizerClass() { assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.LookupClass("Whitespace")); assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.LookupClass("WHITESPACE")); assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.LookupClass("whitespace")); }
public virtual void TestLookupTokenizer() { assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.ForName("Whitespace", VersionArgOnly()).GetType()); assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.ForName("WHITESPACE", VersionArgOnly()).GetType()); assertSame(typeof(WhitespaceTokenizerFactory), TokenizerFactory.ForName("whitespace", VersionArgOnly()).GetType()); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private static java.util.List<String> splitByTokenizer(String source, TokenizerFactory tokFactory) throws java.io.IOException private static IList<string> splitByTokenizer(string source, TokenizerFactory tokFactory) { StringReader reader = new StringReader(source); TokenStream ts = loadTokenizer(tokFactory, reader); IList<string> tokList = new List<string>(); try { CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute)); ts.reset(); while (ts.incrementToken()) { if (termAtt.length() > 0) { tokList.Add(termAtt.ToString()); } } } finally { reader.close(); } return tokList; }
private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader) { return tokFactory.create(reader); }
// a , b c , d e f => [[a],[b,c],[d,e,f]] //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private static java.util.List<java.util.List<String>> getSynList(String str, String separator, TokenizerFactory tokFactory) throws java.io.IOException private static IList<IList<string>> getSynList(string str, string separator, TokenizerFactory tokFactory) { IList<string> strList = splitSmart(str, separator, false); // now split on whitespace to get a list of token strings IList<IList<string>> synList = new List<IList<string>>(); foreach (string toks in strList) { IList<string> tokList = tokFactory == null ? splitWS(toks, true) : splitByTokenizer(toks, tokFactory); synList.Add(tokList); } return synList; }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: static void parseRules(Iterable<String> rules, SlowSynonymMap map, String mappingSep, String synSep, boolean expansion, TokenizerFactory tokFactory) throws java.io.IOException internal static void parseRules(IEnumerable<string> rules, SlowSynonymMap map, string mappingSep, string synSep, bool expansion, TokenizerFactory tokFactory) { int count = 0; foreach (string rule in rules) { // To use regexes, we need an expression that specifies an odd number of chars. // This can't really be done with string.split(), and since we need to // do unescaping at some point anyway, we wouldn't be saving any effort // by using regexes. IList<string> mapping = splitSmart(rule, mappingSep, false); IList<IList<string>> source; IList<IList<string>> target; if (mapping.Count > 2) { throw new System.ArgumentException("Invalid Synonym Rule:" + rule); } else if (mapping.Count == 2) { source = getSynList(mapping[0], synSep, tokFactory); target = getSynList(mapping[1], synSep, tokFactory); } else { source = getSynList(mapping[0], synSep, tokFactory); if (expansion) { // expand to all arguments target = source; } else { // reduce to first argument target = new List<>(1); target.Add(source[0]); } } bool includeOrig = false; foreach (IList<string> fromToks in source) { count++; foreach (IList<string> toToks in target) { map.add(fromToks, SlowSynonymMap.makeTokens(toToks), includeOrig, true); } } } }
internal FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter) { Debug.Assert(tokenizer != null); this.tokenizer = tokenizer; this.charFilter = charFilter; this.tokenfilter = tokenfilter; }
public AnalyzerAnonymousInnerClassHelper(FSTSynonymFilterFactory outerInstance, TokenizerFactory factory) { this.outerInstance = outerInstance; this.factory = factory; }
private static TokenStream LoadTokenizer(TokenizerFactory tokFactory, TextReader reader) { return tokFactory.Create(reader); }
private static IList<string> SplitByTokenizer(string source, TokenizerFactory tokFactory) { StringReader reader = new StringReader(source); TokenStream ts = LoadTokenizer(tokFactory, reader); IList<string> tokList = new List<string>(); try { ICharTermAttribute termAtt = ts.AddAttribute<ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { if (termAtt.Length > 0) { tokList.Add(termAtt.ToString()); } } } finally { reader.Dispose(); } return tokList; }