/// <summary> Creates a new instance of the {@link Lucene.Net.Analysis.Standard.StandardTokenizer}. Attaches /// the <code>input</code> to the newly created JFlex scanner. /// /// </summary> /// <param name="input">The input reader /// </param> /// <param name="replaceInvalidAcronym">Set to true to replace mischaracterized acronyms with HOST. /// /// See http://issues.apache.org/jira/browse/LUCENE-1068 /// </param> public StandardTokenizer(System.IO.TextReader input, bool replaceInvalidAcronym) { InitBlock(); this.replaceInvalidAcronym = replaceInvalidAcronym; this.input = input; this.scanner = new StandardTokenizerImpl(input); }
internal static StandardTokenizerImpl GetStandardTokenizerImpl(System.IO.TextReader reader) { if (impl == null) { impl = new StandardTokenizerImpl(reader); } else { impl.yyreset(reader); } return(impl); }
public static Token Next(StandardTokenizerImpl scanner) { int tokenType = scanner.GetNextToken(); if (tokenType == StandardTokenizerImpl.YYEOF) { return null; } int startPosition = scanner.yychar(); string tokenImage = scanner.yytext(); return new Token(tokenImage, startPosition, startPosition + tokenImage.Length, StandardTokenizerImpl.TOKEN_TYPES[tokenType]); }
/// <summary> Creates a new StandardTokenizer with a given /// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" /> /// </summary> public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input):base(factory) { InitBlock(); this.scanner = new StandardTokenizerImpl(input); Init(input, matchVersion); }
/// <summary> Creates a new StandardTokenizer with a given {@link Lucene.Net.Util.AttributeSource.AttributeFactory} </summary> public StandardTokenizer(AttributeFactory factory, System.IO.TextReader input, bool replaceInvalidAcronym) : base(factory) { InitBlock(); this.scanner = new StandardTokenizerImpl(input); Init(input, replaceInvalidAcronym); }
/// <summary> Creates a new instance of the {@link StandardTokenizer}. Attaches the /// <code>input</code> to a newly created JFlex scanner. /// </summary> public StandardTokenizer(System.IO.TextReader input) { InitBlock(); this.input = input; this.scanner = new StandardTokenizerImpl(input); }
/// <summary> Creates a new StandardTokenizer with a given /// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" /> /// </summary> public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input) : base(factory) { InitBlock(); this.scanner = new StandardTokenizerImpl(input); Init(input, matchVersion); }
public StandardTokenizer(AttributeFactory factory, System.IO.TextReader input, bool replaceInvalidAcronym) : base(factory) { InitBlock(); this.scanner = new StandardTokenizerImpl(input); Init(input, replaceInvalidAcronym); }
public const int ACRONYM_DEP = 8; /* deprecated */ /// <summary>Constructs a tokenizer for this Reader. </summary> public StandardTokenizer(System.IO.TextReader reader) : base(reader) { this.scanner = StandardTokenizerImpl.GetStandardTokenizerImpl(reader); }
internal static StandardTokenizerImpl GetStandardTokenizerImpl(System.IO.TextReader reader) { if (impl==null) { impl = new StandardTokenizerImpl(reader); } else { impl.yyreset(reader); } return impl; }