/// <summary> /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>. Attaches the /// <code>input</code> to a the newly created JFlex scanner. Uses the given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="input"> The input </param> /// <param name="tokenOutput"> One of <seealso cref="#TOKENS_ONLY"/>, <seealso cref="#UNTOKENIZED_ONLY"/>, <seealso cref="#BOTH"/> </param> public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, HashSet<string> untokenizedTypes) : base(factory, input) { this.scanner = new WikipediaTokenizerImpl(this.input); init(tokenOutput, untokenizedTypes); }
/// <summary> /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>. Attaches the /// <code>input</code> to a the newly created JFlex scanner. Uses the given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="input"> The input </param> /// <param name="tokenOutput"> One of <seealso cref="#TOKENS_ONLY"/>, <seealso cref="#UNTOKENIZED_ONLY"/>, <seealso cref="#BOTH"/> </param> public WikipediaTokenizer(AttributeFactory factory, TextReader input, int tokenOutput, IEnumerable<string> untokenizedTypes) : base(factory, input) { this.scanner = new WikipediaTokenizerImpl(this.input); Init(tokenOutput, untokenizedTypes); }