public Token(string type, string value, TokenPosition position, TokenAttributes attributes) { Type = type; Value = value; Position = position; Attributes = attributes; }
public void TokenizerNormalizesVersionInput(string text, TokenAttributes expected) { // arrange, act var actual = new VersionAnalyzer().Tokenize(text); // assert Assert.Equal(new[] { expected }, actual); }
public void TokenizerShinglesAndLowercasesInput(string text, TokenAttributes[] expected) { // arrange, act var actual = new ShingledIdentifierAnalyzer().Tokenize(text); // assert Assert.Equal(expected, actual); }
public void TokenizerLowercasesAndSplitsInput(string text, TokenAttributes[] expected) { // arrange, act var actual = new TagsAnalyzer().Tokenize(text); // assert Assert.Equal(expected, actual); }
public void TokenizerLowercasesCamelCasesAndRemovesStopWordsInput(string text, TokenAttributes[] expected) { // arrange, act var actual = new DescriptionAnalyzer().Tokenize(text); // assert Assert.Equal(expected, actual); }
public void TokenizerLowercasesNGramsAndCamelCasesInput(string text, TokenAttributes[] expected) { // arrange, act var actual = new IdentifierAutocompleteAnalyzer().Tokenize(text); // assert Assert.Equal(expected, actual); }
public void TokenizerOnlyLowercasesOwnerInput(string text, TokenAttributes expected) { // arrange, act var actual = new OwnerAnalyzer().Tokenize(text); // assert Assert.Equal(new[] { expected }, actual); }
public void TokenizerOnlyLowercasesOwnerInput(string text, TokenAttributes expected) { // arrange, act var actual = new OwnerAnalyzer().Tokenize(text); // assert Assert.Equal(new[] { expected }, actual); }
public void TokenizerNormalizesVersionInput(string text, TokenAttributes expected) { // arrange, act var actual = new VersionAnalyzer().Tokenize(text); // assert Assert.Equal(new[] { expected }, actual); }
public List<TokenDefinition> Parse() { List<TokenDefinition> definitions = new List<TokenDefinition>(); if (!Valid) return definitions; StreamReader rd = new StreamReader(Filename); while (rd.EndOfStream == false) { string line = rd.ReadLine(); if (!string.IsNullOrEmpty(line)) { TokenDefinition token = null; if(line.StartsWith("//")) { // skip annotations continue; } if (line.Contains(' ')) { var value = line.Split(' '); bool ignored = false; var attributes = new TokenAttributes(); if (value.Length > 2) { for (var i = 2; i < value.Length; i++) { if (value[i].ToLower().Contains("ignore")) ignored = true; else { var _value = value[i].ToString(); if (string.IsNullOrEmpty(_value) == false) attributes.Add(value[i].ToString()); } } } token = new TokenDefinition(value[0], new Regex(value[1]), ignored); token.Attributes = attributes; } else { token = new TokenDefinition(line, new Regex(line)); } if (token != null) definitions.Add(token); } } return definitions; }
public void AddsCorrectFieldAnalyzers(string field, string text, TokenAttributes[] expected) { // arrange var analyzer = new PackageAnalyzer(); // act var tokenStream = analyzer.TokenStream(field, new StringReader(text)); var actual = tokenStream.Tokenize().ToArray(); // assert Assert.Equal(expected, actual); }
public void TokenizingReturnsExpectedTerms(string text, TokenAttributes[] expected) { // Arrange var tokenStream = new StandardTokenizer(Version.LUCENE_30, new StringReader(text)); var filter = new ExpandAcronymsFilter(tokenStream, NuGetAcronymExpansionProvider.Instance); // Act var actual = filter.Tokenize().ToArray(); // Assert Assert.Equal(expected, actual); }
public static Token Parse(string line) { var pieces = parsePieces(line); if (pieces.Count <= 4) return null; try { var position = new TokenPosition(int.Parse(pieces[2]), int.Parse(pieces[3]), int.Parse(pieces[4])); var attributes = new TokenAttributes(); if(pieces.Count >= 5) { for (int i = 5; i < pieces.Count; i++) attributes.Add(pieces[i]); } var token = new Token(pieces[0], pieces[1], position, attributes); return token; } catch (Exception) { return null; } }
public static IEnumerable<TokenAttributes> Tokenize(this TokenStream tokenStream) { var term = tokenStream.GetAttribute<ITermAttribute>(); var offset = tokenStream.GetAttribute<IOffsetAttribute>(); IPositionIncrementAttribute positionIncrement = null; if (tokenStream.HasAttribute<IPositionIncrementAttribute>()) { positionIncrement = tokenStream.GetAttribute<IPositionIncrementAttribute>(); } while (tokenStream.IncrementToken()) { var tokenAttributes = new TokenAttributes(term.Term, offset.StartOffset, offset.EndOffset); if (positionIncrement != null) { tokenAttributes.PositionIncrement = positionIncrement.PositionIncrement; } yield return tokenAttributes; } }
private void SetAttributes(TokenAttributes next) { _termAttribute.SetTermBuffer(next.TermBuffer); _offsetAttribute.SetOffset(next.StartOffset, next.EndOffset); _positionIncrementAttribute.PositionIncrement = next.PositionIncrement; }
private void SetAttributes(TokenAttributes next) { _termAttribute.SetTermBuffer(next.TermBuffer); _offsetAttribute.SetOffset(next.StartOffset, next.EndOffset); _positionIncrementAttribute.PositionIncrement = next.PositionIncrement; }
public void TokenizingReturnsExpectedTermAndOffsets(string text, TokenAttributes[] expected) { // arrange var tokenStream = new StandardTokenizer(Version.LUCENE_30, new StringReader(text)); var filter = new CamelCaseFilter(tokenStream); // act var actual = filter.Tokenize().ToArray(); // assert Assert.Equal(expected, actual); }