public FilteringTokenFilter(Lucene.Net.Util.LuceneVersion version, bool enablePositionIncrements, TokenStream input) : this(version, input) { posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); CheckPositionIncrement(version, enablePositionIncrements); this.enablePositionIncrements = enablePositionIncrements; }
public void WhenParseExpressionFromAndSaveResultAs(Token[] tokens, string key) { var stream = new TokenStream(tokens); var parser = new ExpressionParser(Settings); var result = parser.Parse(stream); ScenarioContext.Current.Set(result, key); }
public override void GetTokenOutliningAction(TokenStream tokenStream, ref string outliningKey, ref OutliningNodeAction tokenAction) { string str = tokenStream.Peek().get_Key(); switch (str) { case null: break; case "OpenCurlyBraceToken": outliningKey = "CodeBlock"; tokenAction = 1; break; case "CloseCurlyBraceToken": outliningKey = "CodeBlock"; tokenAction = 2; break; default: if (!(str == "MultiLineCommentStartToken")) { if (str == "MultiLineCommentEndToken") { outliningKey = "MultiLineComment"; tokenAction = 2; } } else { outliningKey = "MultiLineComment"; tokenAction = 1; } break; } }
public TypeTokenFilter(Version version, bool enablePositionIncrements, TokenStream input, HashSet<string> stopTypes, bool useWhiteList) : base(version, enablePositionIncrements, input) { typeAttribute = AddAttribute<ITypeAttribute>(); this.stopTypes = stopTypes; this.useWhiteList = useWhiteList; }
/// <summary> /// Create a new <seealso cref="FilteringTokenFilter"/>. </summary> /// <param name="version"> the Lucene match version </param> /// <param name="in"> the <seealso cref="TokenStream"/> to consume </param> public FilteringTokenFilter(LuceneVersion version, TokenStream @in) : base(@in) { posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); this.version = version; this.enablePositionIncrements = true; }
/// <summary> /// Create a new <seealso cref="PatternKeywordMarkerFilter"/>, that marks the current /// token as a keyword if the tokens term buffer matches the provided /// <seealso cref="Pattern"/> via the <seealso cref="KeywordAttribute"/>. /// </summary> /// <param name="in"> /// TokenStream to filter </param> /// <param name="pattern"> /// the pattern to apply to the incoming term buffer /// </param> public PatternKeywordMarkerFilter(TokenStream @in, Pattern pattern) : base(@in) { termAtt = AddAttribute<ICharTermAttribute>(); this.matcher = pattern.matcher(""); }
/// <summary> /// Create a new IndonesianStemFilter. /// <para> /// If <code>stemDerivational</code> is false, /// only inflectional suffixes (particles and possessive pronouns) are stemmed. /// </para> /// </summary> public IndonesianStemFilter(TokenStream input, bool stemDerivational) : base(input) { this.stemDerivational = stemDerivational; termAtt = AddAttribute<ICharTermAttribute>(); keywordAtt = AddAttribute<IKeywordAttribute>(); }
public override TokenStream Rewrite(TokenStream tokens) { // Token stream needs to be in infix notation. if (tokens.Notation != TokenNotation.Infix) { throw new MathExpressionException("Token stream is not in infix notation."); } List<Token> result = new List<Token>(); for (int index = 0; index < tokens.Count; index++) { if (index > 0) { // Was the previous token an operand and the current one an opening parenthesis? if (tokens[index].Type == TokenType.OpeningParenthesis && tokens[index - 1].IsOperand()) { // Insert an multiplication operator token. result.Add(new Token(TokenType.Multiplication, "*")); } // Was the previous token a constant and the current one is a variable? if (tokens[index].Type == TokenType.Variable && tokens[index - 1].Type == TokenType.Numeric) { result.Add(new Token(TokenType.Multiplication, "*")); } } result.Add(tokens[index]); } return new TokenStream(result, TokenNotation.Infix); }
public void AddScriptMacro(String Script) { var stream = new TokenStream(new StringIterator(Script), Context); var declaration = Parse.ParseMacroDeclaration(stream, Context); declaration.OwnerContextID = Context.ID; Context.PendingEmission.Add(declaration); }
public bool IsCompleteExpression(int currentTokenIndex) { // Within the current scope find if text between scope start and the current // token position is a complete expression. We preserve user indentation // in multiline expressions so we need to know if a particular position // in a middle of an expression. Simple cases liike when previous token was // an operator are handled directly. In more complex cases such scope-less // function definitions we need to parse the statement. int startIndex = 0; for (int i = currentTokenIndex - 1; i >= 0; i--) { if (_tokens[i].TokenType == RTokenType.OpenCurlyBrace || _tokens[i].TokenType == RTokenType.CloseCurlyBrace) { startIndex = i + 1; break; } } if (startIndex < currentTokenIndex) { var startToken = _tokens[startIndex]; var currentToken = _tokens[currentTokenIndex]; // Limit token stream since parser may not necessarily stop at the supplied text range end. var list = new List<RToken>(); var tokens = _tokens.Skip(startIndex).Take(currentTokenIndex - startIndex); var ts = new TokenStream<RToken>(new TextRangeCollection<RToken>(tokens), RToken.EndOfStreamToken); var end = currentToken.TokenType != RTokenType.EndOfStream ? currentToken.Start : _textProvider.Length; var ast = RParser.Parse(_textProvider, TextRange.FromBounds(startToken.Start, end), ts, new List<RToken>(), null); return ast.IsCompleteExpression(); } return true; }
protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(input) { termAtt = AddAttribute<ICharTermAttribute>() as CharTermAttribute; offsetAtt = AddAttribute<IOffsetAttribute>(); posIncAtt = AddAttribute<IPositionIncrementAttribute>(); this.matchVersion = matchVersion; this.tokens = new LinkedList<CompoundToken>(); if (minWordSize < 0) { throw new System.ArgumentException("minWordSize cannot be negative"); } this.minWordSize = minWordSize; if (minSubwordSize < 0) { throw new System.ArgumentException("minSubwordSize cannot be negative"); } this.minSubwordSize = minSubwordSize; if (maxSubwordSize < 0) { throw new System.ArgumentException("maxSubwordSize cannot be negative"); } this.maxSubwordSize = maxSubwordSize; this.onlyLongestMatch = onlyLongestMatch; this.dictionary = dictionary; }
// we only check a few core attributes here. // TODO: test other things //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void assertEquals(String s, org.apache.lucene.analysis.TokenStream left, org.apache.lucene.analysis.TokenStream right) throws Exception public virtual void assertEquals(string s, TokenStream left, TokenStream right) { left.reset(); right.reset(); CharTermAttribute leftTerm = left.addAttribute(typeof(CharTermAttribute)); CharTermAttribute rightTerm = right.addAttribute(typeof(CharTermAttribute)); OffsetAttribute leftOffset = left.addAttribute(typeof(OffsetAttribute)); OffsetAttribute rightOffset = right.addAttribute(typeof(OffsetAttribute)); PositionIncrementAttribute leftPos = left.addAttribute(typeof(PositionIncrementAttribute)); PositionIncrementAttribute rightPos = right.addAttribute(typeof(PositionIncrementAttribute)); while (left.incrementToken()) { assertTrue("wrong number of tokens for input: " + s, right.incrementToken()); assertEquals("wrong term text for input: " + s, leftTerm.ToString(), rightTerm.ToString()); assertEquals("wrong position for input: " + s, leftPos.PositionIncrement, rightPos.PositionIncrement); assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset()); assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset()); }; assertFalse("wrong number of tokens for input: " + s, right.incrementToken()); left.end(); right.end(); assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset()); left.close(); right.close(); }
/// <summary> /// Create a new <seealso cref="CodepointCountFilter"/>. This will filter out tokens whose /// <seealso cref="CharTermAttribute"/> is either too short (<seealso cref="Character#CodePointCount(char[], int, int)"/> /// < min) or too long (<seealso cref="Character#codePointCount(char[], int, int)"/> > max). </summary> /// <param name="version"> the Lucene match version </param> /// <param name="in"> the <seealso cref="TokenStream"/> to consume </param> /// <param name="min"> the minimum length </param> /// <param name="max"> the maximum length </param> public CodepointCountFilter(LuceneVersion version, TokenStream @in, int min, int max) : base(version, @in) { this.min = min; this.max = max; termAtt = AddAttribute<ICharTermAttribute>(); }
private Executable ParseImport(TokenStream tokens, int indention) { tokens.SkipWhitespace(); List<Token> fromChain = null; List<Token> importChain = null; List<Token> asChain = null; Token firstToken = null; if (tokens.PeekValue() == "from") { firstToken = tokens.Pop(); fromChain = ParseDotChainForImport(tokens); } firstToken = firstToken ?? tokens.PopExpected("import"); importChain = ParseDotChainForImport(tokens); if (tokens.PopIfPresent("as")) { asChain = ParseDotChainForImport(tokens); if (asChain.Count > 1) throw new ParserException(asChain[0], "Expected: variable"); } return new ImportStatement(firstToken, importChain, fromChain, asChain == null ? null : asChain[0]); }
/// <summary> /// Creates NGramTokenFilter with given min and max n-grams. </summary> /// <param name="version"> Lucene version to enable correct position increments. /// See <a href="#version">above</a> for details. </param> /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param> /// <param name="minGram"> the smallest n-gram to generate </param> /// <param name="maxGram"> the largest n-gram to generate </param> public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) : base(new CodepointCountFilter(version, input, minGram, int.MaxValue)) { this.version = version; this.charUtils = version.onOrAfter(Version.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance; if (minGram < 1) { throw new System.ArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new System.ArgumentException("minGram must not be greater than maxGram"); } this.minGram = minGram; this.maxGram = maxGram; if (version.onOrAfter(Version.LUCENE_44)) { posIncAtt = addAttribute(typeof(PositionIncrementAttribute)); posLenAtt = addAttribute(typeof(PositionLengthAttribute)); } else { posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this); posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this); } }
public StandardFilter(LuceneVersion matchVersion, TokenStream @in) : base(@in) { this.matchVersion = matchVersion; typeAtt = AddAttribute<ITypeAttribute>(); termAtt = AddAttribute<ICharTermAttribute>(); }
/// <summary> /// Builds a GermanStemFilter that uses an exclusiontable. /// </summary> /// <param name="_in"></param> /// <param name="exclusiontable"></param> /// <param name="normalizeDin2">Specifies if the DIN-2007-2 style stemmer should be used in addition to DIN1. This /// will cause words with 'ae', 'ue', or 'oe' in them (expanded umlauts) to be first converted to 'a', 'u', and 'o' /// respectively, before the DIN1 stemmer is invoked.</param> public GermanStemFilter(TokenStream _in, ISet<string> exclusiontable, bool normalizeDin2) : base(_in) { exclusionSet = exclusiontable; stemmer = normalizeDin2 ? new GermanDIN2Stemmer() : new GermanStemmer(); termAtt = AddAttribute<ITermAttribute>(); }
/// <summary> /// Create a new UpperCaseFilter, that normalizes token text to upper case. /// </summary> /// <param name="matchVersion"> See <a href="#version">above</a> </param> /// <param name="in"> TokenStream to filter </param> public UpperCaseFilter(LuceneVersion matchVersion, TokenStream @in) : base(@in) { termAtt = AddAttribute<ICharTermAttribute>(); termAtt = AddAttribute<ICharTermAttribute>(); charUtils = CharacterUtils.GetInstance(matchVersion); }
/// <summary> /// Creates NGramTokenFilter with given min and max n-grams. </summary> /// <param name="version"> Lucene version to enable correct position increments. /// See <a href="#version">above</a> for details. </param> /// <param name="input"> <seealso cref="TokenStream"/> holding the input to be tokenized </param> /// <param name="minGram"> the smallest n-gram to generate </param> /// <param name="maxGram"> the largest n-gram to generate </param> public NGramTokenFilter(LuceneVersion version, TokenStream input, int minGram, int maxGram) : base(new CodepointCountFilter(version, input, minGram, int.MaxValue)) { this.version = version; this.charUtils = version.OnOrAfter( #pragma warning disable 612, 618 LuceneVersion.LUCENE_44) ? #pragma warning restore 612, 618 CharacterUtils.GetInstance(version) : CharacterUtils.Java4Instance; if (minGram < 1) { throw new System.ArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new System.ArgumentException("minGram must not be greater than maxGram"); } this.minGram = minGram; this.maxGram = maxGram; #pragma warning disable 612, 618 if (version.OnOrAfter(LuceneVersion.LUCENE_44)) #pragma warning restore 612, 618 { posIncAtt = AddAttribute<IPositionIncrementAttribute>(); posLenAtt = AddAttribute<IPositionLengthAttribute>(); } else { posIncAtt = new PositionIncrementAttributeAnonymousInnerClassHelper(this); posLenAtt = new PositionLengthAttributeAnonymousInnerClassHelper(this); } termAtt = AddAttribute<ICharTermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); }
public ChineseFilter(TokenStream @in) : base(@in) { stopTable = new CharArraySet(LuceneVersion.LUCENE_CURRENT, Arrays.AsList(STOP_WORDS), false); termAtt = AddAttribute<ICharTermAttribute>(); }
///////////////////////////////////////////////////////////////////////////////////////////////////// // PUBLIC PROCEDURES ///////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> /// Returns token parsing information for automatic outlining that determines if the current <see cref="IToken"/> /// in the <see cref="TokenStream"/> starts or ends an outlining node. /// </summary> /// <param name="tokenStream">A <see cref="TokenStream"/> that is positioned at the <see cref="IToken"/> requiring outlining data.</param> /// <param name="outliningKey">Returns the outlining node key to assign. A <see langword="null"/> should be returned if the token doesn't start or end a node.</param> /// <param name="tokenAction">Returns the <see cref="OutliningNodeAction"/> to take for the token.</param> public override void GetTokenOutliningAction(TokenStream tokenStream, ref string outliningKey, ref OutliningNodeAction tokenAction) { // Get the token IToken token = tokenStream.Peek(); // See if the token starts or ends an outlining node switch (token.Key) { case "MultiLineCommentStartToken": outliningKey = "MultiLineComment"; tokenAction = OutliningNodeAction.Start; break; case "MultiLineCommentEndToken": outliningKey = "MultiLineComment"; tokenAction = OutliningNodeAction.End; break; case "RegionStartToken": outliningKey = "Region"; tokenAction = OutliningNodeAction.Start; break; case "EndRegionStartToken": outliningKey = "Region"; tokenAction = OutliningNodeAction.End; break; } }
public void DoesNotChangeStateAsUnderlyingEnumeratorIsTraversed() { var tokens = new TokenStream(Tokens()); var first = tokens; first.Current.ShouldBe(upper, "ABC", 1, 1); var second = first.Advance(); first.Current.ShouldBe(upper, "ABC", 1, 1); second.Current.ShouldBe(lower, "def", 1, 4); var third = second.Advance(); first.Current.ShouldBe(upper, "ABC", 1, 1); second.Current.ShouldBe(lower, "def", 1, 4); third.Current.ShouldBe(upper, "GHI", 1, 7); var fourth = third.Advance(); first.Current.ShouldBe(upper, "ABC", 1, 1); second.Current.ShouldBe(lower, "def", 1, 4); third.Current.ShouldBe(upper, "GHI", 1, 7); fourth.Current.ShouldBe(TokenKind.EndOfInput, "", 1, 10); fourth.Advance().ShouldBeSameAs(fourth); }
/// <summary> /// Creates a new NorwegianLightStemFilter </summary> /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>, /// <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param> public NorwegianMinimalStemFilter(TokenStream input, int flags) : base(input) { this.stemmer = new NorwegianMinimalStemmer(flags); termAtt = AddAttribute<ICharTermAttribute>(); keywordAttr = AddAttribute<IKeywordAttribute>(); }
public EdgeNGramTokenFilter(LuceneVersion version, TokenStream input, Side side, int minGram, int maxGram) : base(input) { if (version == null) { throw new System.ArgumentException("version must not be null"); } if (version.OnOrAfter(LuceneVersion.LUCENE_44) && side == Side.BACK) { throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward"); } if (side == null) { throw new System.ArgumentException("sideLabel must be either front or back"); } if (minGram < 1) { throw new System.ArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new System.ArgumentException("minGram must not be greater than maxGram"); } this.version = version; this.charUtils = version.onOrAfter(LuceneVersion.LUCENE_44) ? CharacterUtils.getInstance(version) : CharacterUtils.Java4Instance; this.minGram = minGram; this.maxGram = maxGram; this.side = side; }
public override void GetTokenOutliningAction(TokenStream tokenStream, ref string outliningKey, ref OutliningNodeAction tokenAction) { Token token = tokenStream.Peek(); if ((token.Key == "OpenCurlyBraceToken" || token.Key == "CloseCurlyBraceToken") && g.Config.b_Ed_CodeFold == false) return; switch (token.Key) { case "OpenCurlyBraceToken": outliningKey = "CodeBlock"; tokenAction = OutliningNodeAction.Start; break; case "CloseCurlyBraceToken": outliningKey = "CodeBlock"; tokenAction = OutliningNodeAction.End; break; case "RegionStartToken": outliningKey = "CodeRegion"; tokenAction = OutliningNodeAction.Start; break; case "RegionEndToken": outliningKey = "CodeRegion"; tokenAction = OutliningNodeAction.End; break; } }
/// <summary> /// Create a new <seealso cref="TypeTokenFilter"/>. </summary> /// <param name="version"> the Lucene match version </param> /// <param name="input"> the <seealso cref="TokenStream"/> to consume </param> /// <param name="stopTypes"> the types to filter </param> /// <param name="useWhiteList"> if true, then tokens whose type is in stopTypes will /// be kept, otherwise they will be filtered out </param> public TypeTokenFilter(LuceneVersion version, TokenStream input, IEnumerable<string> stopTypes, bool useWhiteList) : base(version, input) { typeAttribute = AddAttribute<ITypeAttribute>(); this.stopTypes = new HashSet<string>(stopTypes); this.useWhiteList = useWhiteList; }
public SnowballFilter(TokenStream input, SnowballProgram stemmer) : base(input) { this.stemmer = stemmer; this.termAtt = AddAttribute<ICharTermAttribute>(); this.keywordAttr = AddAttribute<IKeywordAttribute>(); }
/// <summary> /// Creates a CapitalizationFilter with the specified parameters. </summary> /// <param name="in"> input tokenstream </param> /// <param name="onlyFirstWord"> should each word be capitalized or all of the words? </param> /// <param name="keep"> a keep word list. Each word that should be kept separated by whitespace. </param> /// <param name="forceFirstLetter"> Force the first letter to be capitalized even if it is in the keep list. </param> /// <param name="okPrefix"> do not change word capitalization if a word begins with something in this list. </param> /// <param name="minWordLength"> how long the word needs to be to get capitalization applied. If the /// minWordLength is 3, "and" > "And" but "or" stays "or". </param> /// <param name="maxWordCount"> if the token contains more then maxWordCount words, the capitalization is /// assumed to be correct. </param> /// <param name="maxTokenLength"> ??? </param> public CapitalizationFilter(TokenStream @in, bool onlyFirstWord, CharArraySet keep, bool forceFirstLetter, ICollection<char[]> okPrefix, int minWordLength, int maxWordCount, int maxTokenLength) : base(@in) { // LUCENENET: The guard clauses were copied here from the version of Lucene. // Apparently, the tests were not ported from 4.8.0 because they expected this and the // original tests did not. Adding them anyway because there is no downside to this. if (minWordLength < 0) { throw new ArgumentOutOfRangeException("minWordLength must be greater than or equal to zero"); } if (maxWordCount < 1) { throw new ArgumentOutOfRangeException("maxWordCount must be greater than zero"); } if (maxTokenLength < 1) { throw new ArgumentOutOfRangeException("maxTokenLength must be greater than zero"); } this.onlyFirstWord = onlyFirstWord; this.keep = keep; this.forceFirstLetter = forceFirstLetter; this.okPrefix = okPrefix; this.minWordLength = minWordLength; this.maxWordCount = maxWordCount; this.maxTokenLength = maxTokenLength; termAtt = AddAttribute<ICharTermAttribute>(); }
public LaxExpression Parse(TokenStream reader) { var start = reader.Next.CodeRange; try { ParseStream(reader); //Finish the stack while (opStack.Count > 0) { CompleteStack(); } if (operandStack.Count != 1) throw new SyntaxError(start, "Expected operator"); } catch (SyntaxError) { throw; } #if !DEBUG catch (Exception ex) { throw new SyntaxError(reader.Current.CodeRange, ex); } #endif if (operandStack.Count != 1) throw new SyntaxError(start, "Expected only one operator left"); return operandStack.Pop(); }
public override TokenStream Create(TokenStream input) { #pragma warning disable 612, 618 var filter = new TrimFilter(luceneMatchVersion, input, updateOffsets); #pragma warning restore 612, 618 return filter; }
public PayloadFilter(TokenStream input, string fieldName) : base(input) { this.FieldName = fieldName; PayloadAtt = AddAttribute <IPayloadAttribute>(); }
public override TokenStream Create(TokenStream input) { return(new HungarianLightStemFilter(input)); }
public KStemFilter(TokenStream @in) : base(@in) { termAttribute = AddAttribute <ICharTermAttribute>(); keywordAtt = AddAttribute <IKeywordAttribute>(); }
internal SopTokenFilter(TokenStream input) : base(input) { }
public HindiNormalizationFilter(TokenStream input) : base(input) { termAtt = AddAttribute <ICharTermAttribute>(); keywordAtt = AddAttribute <IKeywordAttribute>(); }
/// <summary> /// Create a new <see cref="TrimFilter"/> on top of <paramref name="in"/>. </summary> public TrimFilter(LuceneVersion version, TokenStream @in) #pragma warning disable 612, 618 : this(version, @in, false) #pragma warning restore 612, 618 { }
public PayloadSetter(TokenStream input) : base(input) { InitializeInstanceFields(); payloadAtt = AddAttribute <IPayloadAttribute>(); }
private LeftRecExprContext leftRecExpr(int _p) { ParserRuleContext _parentctx = Context; int _parentState = State; LeftRecExprContext _localctx = new LeftRecExprContext(Context, _parentState); LeftRecExprContext _prevctx = _localctx; int _startState = 4; EnterRecursionRule(_localctx, 4, RULE_leftRecExpr, _p); int _la; try { int _alt; EnterOuterAlt(_localctx, 1); { { State = 27; atom(); } Context.Stop = TokenStream.LT(-1); State = 49; ErrorHandler.Sync(this); _alt = Interpreter.AdaptivePredict(TokenStream, 1, Context); while (_alt != 2 && _alt != AntlrStandard::Antlr4.Runtime.Atn.ATN.INVALID_ALT_NUMBER) { if (_alt == 1) { if (ParseListeners != null) { TriggerExitRuleEvent(); } _prevctx = _localctx; { State = 47; ErrorHandler.Sync(this); switch (Interpreter.AdaptivePredict(TokenStream, 0, Context)) { case 1: { _localctx = new LeftRecExprContext(_parentctx, _parentState); PushNewRecursionContext(_localctx, _startState, RULE_leftRecExpr); State = 29; if (!(Precpred(Context, 6))) { throw new FailedPredicateException(this, "Precpred(Context, 6)"); } State = 30; Match(CARET); State = 31; leftRecExpr(6); } break; case 2: { _localctx = new LeftRecExprContext(_parentctx, _parentState); PushNewRecursionContext(_localctx, _startState, RULE_leftRecExpr); State = 32; if (!(Precpred(Context, 5))) { throw new FailedPredicateException(this, "Precpred(Context, 5)"); } State = 33; _la = TokenStream.LA(1); if (!((((_la) & ~0x3f) == 0 && ((1L << _la) & ((1L << STAR) | (1L << SLASH) | (1L << PERCENT))) != 0))) { ErrorHandler.RecoverInline(this); } else { ErrorHandler.ReportMatch(this); Consume(); } State = 34; leftRecExpr(6); } break; case 3: { _localctx = new LeftRecExprContext(_parentctx, _parentState); PushNewRecursionContext(_localctx, _startState, RULE_leftRecExpr); State = 35; if (!(Precpred(Context, 4))) { throw new FailedPredicateException(this, "Precpred(Context, 4)"); } State = 36; _la = TokenStream.LA(1); if (!(_la == PLUS || _la == MINUS)) { ErrorHandler.RecoverInline(this); } else { ErrorHandler.ReportMatch(this); Consume(); } State = 37; leftRecExpr(5); } break; case 4: { _localctx = new LeftRecExprContext(_parentctx, _parentState); PushNewRecursionContext(_localctx, _startState, RULE_leftRecExpr); State = 38; if (!(Precpred(Context, 3))) { throw new FailedPredicateException(this, "Precpred(Context, 3)"); } State = 39; _la = TokenStream.LA(1); if (!(_la == EQUAL || _la == NOT_EQUAL)) { ErrorHandler.RecoverInline(this); } else { ErrorHandler.ReportMatch(this); Consume(); } State = 40; leftRecExpr(4); } break; case 5: { _localctx = new LeftRecExprContext(_parentctx, _parentState); PushNewRecursionContext(_localctx, _startState, RULE_leftRecExpr); State = 41; if (!(Precpred(Context, 2))) { throw new FailedPredicateException(this, "Precpred(Context, 2)"); } State = 42; Match(LOGICAL_AND); State = 43; leftRecExpr(3); } break; case 6: { _localctx = new LeftRecExprContext(_parentctx, _parentState); PushNewRecursionContext(_localctx, _startState, RULE_leftRecExpr); State = 44; if (!(Precpred(Context, 1))) { throw new FailedPredicateException(this, "Precpred(Context, 1)"); } State = 45; Match(LOGICAL_OR); State = 46; leftRecExpr(2); } break; } } } State = 51; ErrorHandler.Sync(this); _alt = Interpreter.AdaptivePredict(TokenStream, 1, Context); } } } catch (RecognitionException re) { _localctx.exception = re; ErrorHandler.ReportError(this, re); ErrorHandler.Recover(this, re); } finally { UnrollRecursionContexts(_parentctx); } return(_localctx); }
public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix) { prefix = new PrefixAwareTokenFilterAnonymousInnerClassHelper(this, prefix, input); this.suffix = new PrefixAwareTokenFilterAnonymousInnerClassHelper2(this, prefix, suffix); }
public override TokenStream Create(TokenStream @in) { return(new BrazilianStemFilter(@in)); }
/// <summary> /// (non-Javadoc) /// @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element) /// </summary> public virtual Query GetQuery(XmlElement e) { string fieldsList = e.GetAttribute("fieldNames"); //a comma-delimited list of fields string[] fields = defaultFieldNames; if ((fieldsList != null) && (fieldsList.Trim().Length > 0)) { fields = fieldsList.Trim().Split(',').TrimEnd(); //trim the fieldnames for (int i = 0; i < fields.Length; i++) { fields[i] = fields[i].Trim(); } } //Parse any "stopWords" attribute //TODO MoreLikeThis needs to ideally have per-field stopWords lists - until then //I use all analyzers/fields to generate multi-field compatible stop list string stopWords = e.GetAttribute("stopWords"); ISet <string> stopWordsSet = null; if ((stopWords != null) && (fields != null)) { stopWordsSet = new JCG.HashSet <string>(); foreach (string field in fields) { TokenStream ts = null; try { ts = analyzer.GetTokenStream(field, stopWords); ICharTermAttribute termAtt = ts.AddAttribute <ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { stopWordsSet.Add(termAtt.ToString()); } ts.End(); } catch (IOException ioe) { throw new ParserException("IoException parsing stop words list in " + GetType().Name + ":" + ioe.Message); } finally { IOUtils.DisposeWhileHandlingException(ts); } } } MoreLikeThisQuery mlt = new MoreLikeThisQuery(DOMUtils.GetText(e), fields, analyzer, fields[0]); mlt.MaxQueryTerms = DOMUtils.GetAttribute(e, "maxQueryTerms", DEFAULT_MAX_QUERY_TERMS); mlt.MinTermFrequency = DOMUtils.GetAttribute(e, "minTermFrequency", DEFAULT_MIN_TERM_FREQUENCY); mlt.PercentTermsToMatch = DOMUtils.GetAttribute(e, "percentTermsToMatch", DEFAULT_PERCENT_TERMS_TO_MATCH) / 100; mlt.StopWords = stopWordsSet; int minDocFreq = DOMUtils.GetAttribute(e, "minDocFreq", -1); if (minDocFreq >= 0) { mlt.MinDocFreq = minDocFreq; } mlt.Boost = DOMUtils.GetAttribute(e, "boost", 1.0f); return(mlt); }
public PrefixAwareTokenFilterAnonymousInnerClassHelper2(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream suffix) : base(prefix, suffix) { this.outerInstance = outerInstance; }
public ParseTreeDebugParser(TokenStream lexer, int k_) : base(lexer, k_) { }
/// <summary> /// Instantiates a new <see cref="TeeSinkTokenFilter"/>. /// </summary> public TeeSinkTokenFilter(TokenStream input) : base(input) { }
public SpanishLightStemFilter(TokenStream input) : base(input) { termAtt = AddAttribute <ICharTermAttribute>(); keywordAttr = AddAttribute <IKeywordAttribute>(); }
internal MockRetainAttributeFilter(TokenStream input) : base(input) { retainAtt = AddAttribute <IMockRetainAttribute>(); }
public override TokenStream Create(TokenStream @in) { return(new ChineseFilter(@in)); }
/// <summary> /// Build a filter that only accepts tokens up to and including the given maximum position. /// This filter will not consume any tokens with position greater than the maxTokenPosition limit. /// </summary> /// <param name="in"> the stream to wrap </param> /// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1) /// </param> /// <seealso cref= #LimitTokenPositionFilter(TokenStream,int,boolean) </seealso> public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition) : this(@in, maxTokenPosition, false) { posIncAtt = AddAttribute <IPositionIncrementAttribute>(); }
/// <summary> /// Creates an <see cref="T:IDictionary{string, WeightedSpanTerm}"/> from the given <see cref="Query"/> and <see cref="Analysis.TokenStream"/>. /// </summary> /// <param name="query"><see cref="Query"/> that caused hit</param> /// <param name="tokenStream"><see cref="Analysis.TokenStream"/> of text to be highlighted</param> /// <returns>Map containing <see cref="WeightedSpanTerm"/>s</returns> /// <exception cref="IOException">If there is a low-level I/O error</exception> public virtual IDictionary <string, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream) { return(GetWeightedSpanTerms(query, tokenStream, null)); }
public ArabicStemFilter(TokenStream input) : base(input) { stemmer = new ArabicStemmer(); termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); }
public override TokenStream Create(TokenStream input) { return(new NorwegianMinimalStemFilter(input, flags)); }
/// <summary> /// Creates an <see cref="T:IDictionary{string, WeightedSpanTerm}"/> from the given <see cref="Query"/> and <see cref="Analysis.TokenStream"/>. /// </summary> /// <param name="query"><see cref="Query"/> that caused hit</param> /// <param name="tokenStream"><see cref="Analysis.TokenStream"/> of text to be highlighted</param> /// <param name="fieldName">restricts Term's used based on field name</param> /// <returns>Map containing <see cref="WeightedSpanTerm"/>s</returns> /// <exception cref="IOException">If there is a low-level I/O error</exception> public virtual IDictionary <string, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream, string fieldName) { if (fieldName != null) { this.fieldName = fieldName.Intern(); } else { this.fieldName = null; } IDictionary <string, WeightedSpanTerm> terms = new PositionCheckingMap <string>(); this.tokenStream = tokenStream; try { Extract(query, terms); } finally { IOUtils.Dispose(internalReader); } return(terms); }
public override TokenStream Create(TokenStream input) { return(new LimitTokenCountFilter(input, maxTokenCount, consumeAllTokens)); }
public override TokenStream Create(TokenStream input) { return(new FinnishLightStemFilter(input)); }
/// <summary> /// Creates a CapitalizationFilter with the default parameters. /// <para> /// Calls {@link #CapitalizationFilter(TokenStream, boolean, CharArraySet, boolean, Collection, int, int, int) /// CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)} /// </para> /// </summary> public CapitalizationFilter(TokenStream @in) : this(@in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH) { termAtt = AddAttribute <ICharTermAttribute>(); }
public TokenStreamRewriteEngine(TokenStream upstream) : this(upstream, 1000) { }
private MainExprContext mainExpr(int _p) { ParserRuleContext _parentctx = Context; int _parentState = State; MainExprContext _localctx = new MainExprContext(Context, _parentState); MainExprContext _prevctx = _localctx; int _startState = 2; EnterRecursionRule(_localctx, 2, RULE_mainExpr, _p); try { int _alt; EnterOuterAlt(_localctx, 1); { State = 21; ErrorHandler.Sync(this); switch (Interpreter.AdaptivePredict(TokenStream, 1, Context)) { case 1: { _localctx = new ParenthesisExpContext(_localctx); Context = _localctx; _prevctx = _localctx; State = 11; Match(T__0); State = 12; mainExpr(0); State = 13; Match(T__1); } break; case 2: { _localctx = new CompareNumberExpContext(_localctx); Context = _localctx; _prevctx = _localctx; State = 15; Match(PROPERTY); State = 16; Match(OPERATOR); State = 17; Match(NUMBER); } break; case 3: { _localctx = new CompareStringExpContext(_localctx); Context = _localctx; _prevctx = _localctx; State = 18; Match(PROPERTY); State = 19; Match(OPERATOR); State = 20; Match(ESCAPEDSTRING); } break; } Context.Stop = TokenStream.LT(-1); State = 31; ErrorHandler.Sync(this); _alt = Interpreter.AdaptivePredict(TokenStream, 3, Context); while (_alt != 2 && _alt != global::Antlr4.Runtime.Atn.ATN.INVALID_ALT_NUMBER) { if (_alt == 1) { if (ParseListeners != null) { TriggerExitRuleEvent(); } _prevctx = _localctx; { State = 29; ErrorHandler.Sync(this); switch (Interpreter.AdaptivePredict(TokenStream, 2, Context)) { case 1: { _localctx = new AndExpContext(new MainExprContext(_parentctx, _parentState)); PushNewRecursionContext(_localctx, _startState, RULE_mainExpr); State = 23; if (!(Precpred(Context, 4))) { throw new FailedPredicateException(this, "Precpred(Context, 4)"); } State = 24; Match(AND); State = 25; mainExpr(5); } break; case 2: { _localctx = new OrExpContext(new MainExprContext(_parentctx, _parentState)); PushNewRecursionContext(_localctx, _startState, RULE_mainExpr); State = 26; if (!(Precpred(Context, 3))) { throw new FailedPredicateException(this, "Precpred(Context, 3)"); } State = 27; Match(OR); State = 28; mainExpr(4); } break; } } } State = 33; ErrorHandler.Sync(this); _alt = Interpreter.AdaptivePredict(TokenStream, 3, Context); } } } catch (RecognitionException re) { _localctx.exception = re; ErrorHandler.ReportError(this, re); ErrorHandler.Recover(this, re); } finally { UnrollRecursionContexts(_parentctx); } return(_localctx); }
public EnglishMinimalStemFilter(TokenStream input) : base(input) { termAtt = AddAttribute <ICharTermAttribute>(); keywordAttr = AddAttribute <IKeywordAttribute>(); }
protected internal virtual void VisitState(ATNState p) { int edge; if (p.NumberOfTransitions > 1) { ErrorHandler.Sync(this); edge = Interpreter.AdaptivePredict(TokenStream, ((DecisionState)p).decision, RuleContext); } else { edge = 1; } Transition transition = p.Transition(edge - 1); switch (transition.TransitionType) { case TransitionType.Epsilon: { if (pushRecursionContextStates.Get(p.stateNumber) && !(transition.target is LoopEndState)) { InterpreterRuleContext ctx = new InterpreterRuleContext(_parentContextStack.Peek().Item1, _parentContextStack.Peek().Item2, RuleContext.RuleIndex); PushNewRecursionContext(ctx, _atn.ruleToStartState[p.ruleIndex].stateNumber, RuleContext.RuleIndex); } break; } case TransitionType.Atom: { Match(((AtomTransition)transition).token); break; } case TransitionType.Range: case TransitionType.Set: case TransitionType.NotSet: { if (!transition.Matches(TokenStream.La(1), TokenConstants.MinUserTokenType, 65535)) { ErrorHandler.RecoverInline(this); } MatchWildcard(); break; } case TransitionType.Wildcard: { MatchWildcard(); break; } case TransitionType.Rule: { RuleStartState ruleStartState = (RuleStartState)transition.target; int ruleIndex = ruleStartState.ruleIndex; InterpreterRuleContext ctx_1 = new InterpreterRuleContext(RuleContext, p.stateNumber, ruleIndex); if (ruleStartState.isPrecedenceRule) { EnterRecursionRule(ctx_1, ruleStartState.stateNumber, ruleIndex, ((RuleTransition)transition).precedence); } else { EnterRule(ctx_1, transition.target.stateNumber, ruleIndex); } break; } case TransitionType.Predicate: { PredicateTransition predicateTransition = (PredicateTransition)transition; if (!Sempred(RuleContext, predicateTransition.ruleIndex, predicateTransition.predIndex)) { throw new FailedPredicateException(this); } break; } case TransitionType.Action: { ActionTransition actionTransition = (ActionTransition)transition; Action(RuleContext, actionTransition.ruleIndex, actionTransition.actionIndex); break; } case TransitionType.Precedence: { if (!Precpred(RuleContext, ((PrecedencePredicateTransition)transition).precedence)) { throw new FailedPredicateException(this, string.Format("precpred(_ctx, {0})", ((PrecedencePredicateTransition)transition).precedence)); } break; } default: { throw new NotSupportedException("Unrecognized ATN transition type."); } } State = transition.target.stateNumber; }
public override TokenStream Create(TokenStream input) { return(new PatternCaptureGroupTokenFilter(input, preserveOriginal, pattern)); }