/// <summary> /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>. Attaches the /// <code>input</code> to a the newly created JFlex scanner. Uses the given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="input"> The input </param> /// <param name="tokenOutput"> One of <seealso cref="#TOKENS_ONLY"/>, <seealso cref="#UNTOKENIZED_ONLY"/>, <seealso cref="#BOTH"/> </param> public WikipediaTokenizer(AttributeFactory factory, TextReader input, int tokenOutput, IEnumerable <string> untokenizedTypes) : base(factory, input) { this.scanner = new WikipediaTokenizerImpl(this.input); Init(tokenOutput, untokenizedTypes); }
/// <summary> /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>. Attaches the /// <code>input</code> to a the newly created JFlex scanner. Uses the given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="input"> The input </param> /// <param name="tokenOutput"> One of <seealso cref="#TOKENS_ONLY"/>, <seealso cref="#UNTOKENIZED_ONLY"/>, <seealso cref="#BOTH"/> </param> public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, HashSet <string> untokenizedTypes) : base(factory, input) { this.scanner = new WikipediaTokenizerImpl(this.input); init(tokenOutput, untokenizedTypes); }
/// <summary> /// Creates a new instance of the <see cref="WikipediaTokenizer"/>. Attaches the /// <paramref name="input"/> to a the newly created JFlex scanner. /// </summary> /// <param name="input"> The input </param> /// <param name="tokenOutput"> One of <see cref="TOKENS_ONLY"/>, <see cref="UNTOKENIZED_ONLY"/>, <see cref="BOTH"/> </param> /// <param name="untokenizedTypes"> Untokenized types </param> public WikipediaTokenizer(TextReader input, int tokenOutput, ICollection <string> untokenizedTypes) : base(input) { this.scanner = new WikipediaTokenizerImpl(this.m_input); Init(tokenOutput, untokenizedTypes); }
/// <summary> /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.wikipedia.WikipediaTokenizer"/>. Attaches the /// <code>input</code> to a the newly created JFlex scanner. Uses the given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. /// </summary> /// <param name="input"> The input </param> /// <param name="tokenOutput"> One of <seealso cref="#TOKENS_ONLY"/>, <seealso cref="#UNTOKENIZED_ONLY"/>, <seealso cref="#BOTH"/> </param> public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, HashSet<string> untokenizedTypes) : base(factory, input) { this.scanner = new WikipediaTokenizerImpl(this.input); init(tokenOutput, untokenizedTypes); }