public async Task <int> Count(StandardFilter filter) { // Tweak to the count query to perform a count rather than query actual fields. // The in memory filter causes this query to become more resource intensive // To get around that for the Active and Active Available filters // We perform the normal filter that we can, then select distinct lars code to get latest version count int count; var standards = _coursesDataContext.Standards.FilterStandards(filter); switch (filter) { case StandardFilter.Active: case StandardFilter.ActiveAvailable: count = await standards.Select(c => c.LarsCode).Distinct().CountAsync(); break; default: count = await standards.Select(c => c.StandardUId).CountAsync(); break; } return(count); }
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { //create the tokenizer TokenStream result = new StandardTokenizer(CURRENT_VERSION, reader); //add in filters //*result = new Lucene.Net.Analysis.Snowball.SnowballFilter(result, new EnglishStemmer()); //add in filters // first normalize the StandardTokenizer result = new StandardFilter(result); // makes sure everything is lower case result = new LowerCaseFilter(result); result = new ASCIIFoldingFilter(result); // use the default list of Stop Words, provided by the StopAnalyzer class. //-result = new StopFilter(true, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); result = new StopFilter(true, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); // injects the synonyms. result = new SynonymFilter(result, SynonymEngine); //return the built token stream. return(result); }
/// <summary> /// Constructs a <see cref="StandardTokenizer"/> filtered by a /// <see cref="StandardFilter"/>, a <see cref="LowerCaseFilter"/>, a <see cref="StopFilter"/>, /// and a <see cref="SnowballFilter"/> /// </summary> protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, tokenizer); // remove the possessive 's for english stemmers if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && (name.Equals("English", StringComparison.Ordinal) || name.Equals("Porter", StringComparison.Ordinal) || name.Equals("Lovins", StringComparison.Ordinal))) { result = new EnglishPossessiveFilter(result); } // Use a special lowercase filter for turkish, the stemmer expects it. if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && name.Equals("Turkish", StringComparison.Ordinal)) { result = new TurkishLowerCaseFilter(result); } else { result = new LowerCaseFilter(matchVersion, result); } if (stopSet != null) { result = new StopFilter(matchVersion, result, stopSet); } result = new SnowballFilter(result, name); return(new TokenStreamComponents(tokenizer, result)); }
/// <summary> /// Creates /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// used to tokenize all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from a <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="ThaiWordFilter"/>, and /// <seealso cref="StopFilter"/> </returns> protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { if (matchVersion.onOrAfter(Version.LUCENE_48)) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new ThaiTokenizer(reader); Tokenizer source = new ThaiTokenizer(reader); TokenStream result = new LowerCaseFilter(matchVersion, source); result = new StopFilter(matchVersion, result, stopwords); return(new TokenStreamComponents(source, result)); } else { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader); Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); if (matchVersion.onOrAfter(Version.LUCENE_31)) { result = new LowerCaseFilter(matchVersion, result); } result = new ThaiWordFilter(matchVersion, result); return(new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords))); } }
/// <summary> /// Creates a /// <see cref="TokenStreamComponents"/> /// which tokenizes all the text in the provided <see cref="TextReader"/>. /// </summary> /// <returns> A /// <see cref="TokenStreamComponents"/> /// built from an <see cref="StandardTokenizer"/> filtered with /// <see cref="StandardFilter"/>, <see cref="ElisionFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>, /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided and <see cref="ItalianLightStemFilter"/>. </returns> protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new StandardTokenizer(m_matchVersion, reader); TokenStream result = new StandardFilter(m_matchVersion, source); #pragma warning disable 612, 618 if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_32)) #pragma warning restore 612, 618 { result = new ElisionFilter(result, DEFAULT_ARTICLES); } result = new LowerCaseFilter(m_matchVersion, result); result = new StopFilter(m_matchVersion, result, m_stopwords); if (stemExclusionSet.Count > 0) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } #pragma warning disable 612, 618 if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_36)) #pragma warning restore 612, 618 { result = new ItalianLightStemFilter(result); } else { result = new SnowballFilter(result, new ItalianStemmer()); } return(new TokenStreamComponents(source, result)); }
/// <summary> /// Returns a (possibly reused) <seealso cref="TokenStream"/> which tokenizes all the /// text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> A <seealso cref="TokenStream"/> built from a <seealso cref="StandardTokenizer"/> /// filtered with <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, /// <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is provided, /// <seealso cref="StemmerOverrideFilter"/>, and <seealso cref="SnowballFilter"/> </returns> public override TokenStreamComponents CreateComponents(string fieldName, TextReader aReader) { if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31)) { Tokenizer source = new StandardTokenizer(matchVersion, aReader); TokenStream result = new StandardFilter(matchVersion, source); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stoptable); if (excltable.Count > 0) { result = new SetKeywordMarkerFilter(result, excltable); } if (stemdict != null) { result = new StemmerOverrideFilter(result, stemdict); } result = new SnowballFilter(result, new Tartarus.Snowball.Ext.DutchStemmer()); return(new TokenStreamComponents(source, result)); } else { Tokenizer source = new StandardTokenizer(matchVersion, aReader); TokenStream result = new StandardFilter(matchVersion, source); result = new StopFilter(matchVersion, result, stoptable); if (excltable.Count > 0) { result = new SetKeywordMarkerFilter(result, excltable); } result = new DutchStemFilter(result, origStemdict); return(new TokenStreamComponents(source, result)); } }
public static string TextProcessing(string content) { // StandardTokenizer: Splits words at punctuation characters, removing punctuation. However, // a dot that's not followed by whitespace is considered part of a token. // Splits words at hyphens, unless there's a number in the token, in which case the whole token // is interpreted as a product number and is not split. // Recognizes email addresses and internet hostnames as one token. TokenStream tokenStream = new StandardTokenizer(Version.LUCENE_29, new StringReader(content)); // StandardFilter: Normalizes tokens extracted with StandardTokenizer. // Removes 's from the end of words. // Removes dots from acronyms. tokenStream = new StandardFilter(tokenStream); //LowerCaseFilter: Normalizes token text to lower case. tokenStream = new LowerCaseFilter(tokenStream); //StopFilter: Removes stop words from a token stream. tokenStream = new StopFilter(true, tokenStream, StopWordList.EnglishStopWordsSet); //PorterStem: Transforms the token stream as per the Porter stemming algorithm. tokenStream = new PorterStemFilter(tokenStream); return(TokenToString(tokenStream)); }
/// <summary> /// Creates /// <see cref="TokenStreamComponents"/> /// used to tokenize all the text in the provided <see cref="TextReader"/>. /// </summary> /// <returns> <see cref="TokenStreamComponents"/> /// built from a <see cref="StandardTokenizer"/> filtered with /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/> /// , <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided, and <see cref="SnowballFilter"/> </returns> protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { #pragma warning disable 612, 618 if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_31)) #pragma warning restore 612, 618 { Tokenizer source = new StandardTokenizer(m_matchVersion, reader); TokenStream result = new StandardFilter(m_matchVersion, source); result = new LowerCaseFilter(m_matchVersion, result); result = new StopFilter(m_matchVersion, result, m_stopwords); if (stemExclusionSet.Count > 0) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new SnowballFilter(result, new Tartarus.Snowball.Ext.RussianStemmer()); return(new TokenStreamComponents(source, result)); } else { #pragma warning disable 612, 618 Tokenizer source = new RussianLetterTokenizer(m_matchVersion, reader); #pragma warning restore 612, 618 TokenStream result = new LowerCaseFilter(m_matchVersion, source); result = new StopFilter(m_matchVersion, result, m_stopwords); if (stemExclusionSet.Count > 0) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new SnowballFilter(result, new Tartarus.Snowball.Ext.RussianStemmer()); return(new TokenStreamComponents(source, result)); } }
public ActionResult GeneralDisplay(StandardFilter standardFilter) { GeneralDisplayViewModel generalDisplayViewModel = CreateGeneralDisplayViewModel(standardFilter); // return view return(View(generalDisplayViewModel)); }
/// <summary> /// Creates /// <see cref="TokenStreamComponents"/> /// used to tokenize all the text in the provided <see cref="TextReader"/>. /// </summary> /// <returns> <see cref="TokenStreamComponents"/> /// built from a <see cref="StandardTokenizer"/> filtered with /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="StopFilter"/>, /// <see cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided, <see cref="GermanNormalizationFilter"/> and <see cref="GermanLightStemFilter"/> </returns> protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { Tokenizer source = new StandardTokenizer(m_matchVersion, reader); TokenStream result = new StandardFilter(m_matchVersion, source); result = new LowerCaseFilter(m_matchVersion, result); result = new StopFilter(m_matchVersion, result, m_stopwords); result = new SetKeywordMarkerFilter(result, exclusionSet); #pragma warning disable 612, 618 if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_36)) { result = new GermanNormalizationFilter(result); result = new GermanLightStemFilter(result); } else if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_31)) #pragma warning restore 612, 618 { result = new SnowballFilter(result, new German2Stemmer()); } else { result = new GermanStemFilter(result); } return(new TokenStreamComponents(source, result)); }
public async Task And_No_Keyword_And_Filtering_By_Active_Then_Gets_Standards_From_Repository( List <Standard> standardsFromRepo, OrderBy orderBy, StandardFilter filter, [Frozen] Mock <IStandardRepository> mockStandardsRepository, [Frozen] Mock <IStandardsSortOrderService> mockSortOrderService, StandardsService service) { filter = StandardFilter.Active; mockStandardsRepository .Setup(repository => repository.GetStandards(new List <int>(), new List <int>(), filter)) .ReturnsAsync(standardsFromRepo); mockSortOrderService .Setup(orderService => orderService.OrderBy(standardsFromRepo, It.IsAny <OrderBy>(), It.IsAny <string>())) .Returns(standardsFromRepo.OrderBy(standard => standard.SearchScore)); var result = (await service.GetStandardsList("", new List <int>(), new List <int>(), orderBy, filter)).ToList(); result.Should().BeEquivalentTo(standardsFromRepo, StandardEquivalencyAssertionOptions.ExcludingFields); foreach (var standard in result) { standard.Route.Should().Be(standardsFromRepo.Single(c => c.LarsCode.Equals(standard.LarsCode)).Route.Name); } }
/// <summary> /// Tokenizes a field for use in an autocomplete search. Ref DOH-893. /// Inspiration taken from: /// https://github.com/Sitecore/autohaus/blob/master/Autohaus.Custom/Indexing/Analyzers/NGramAnalyzer.cs /// http://stackoverflow.com/a/9183416 /// </summary> public override TokenStream TokenStream(string fieldName, TextReader reader) { // This should be a good tokenizer for most European-language documents: // Splits words at punctuation characters, removing punctuation. // Splits words at hyphens, unless there's a number in the token... // Recognizes email addresses and internet hostnames as one token. TokenStream tokenStream = new StandardTokenizer(this.version, reader); // apply a set of standard filters tokenStream = new StandardFilter(tokenStream); tokenStream = new LowerCaseFilter(tokenStream); // This class converts alphabetic, numeric, and symbolic Unicode characters // which are not in the first 127 ASCII characters (the "Basic Latin" Unicode // block) into their ASCII equivalents, if one exists. tokenStream = new ASCIIFoldingFilter(tokenStream); tokenStream = new StopFilter(false, tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); // apply the EdgeNGramTokenFilter // this turns each token into a set of prefixes, e.g. // "South Melbourne" will be turned into "Sou South Mel Melb Melb ..." tokenStream = new EdgeNGramTokenFilter(tokenStream, Side.FRONT, this.minGram, this.maxGram); // Removes stop words from a token stream. return(tokenStream); }
/// <summary> /// Gets the author entity corresponding to the specified author. /// </summary> /// <param name="author">The author or null.</param> /// <param name="context">The context.</param> /// <returns>The entity or null.</returns> /// <exception cref="ArgumentNullException">context</exception> public static EfAuthor GetEfAuthor(Author author, BiblioDbContext context) { if (context == null) { throw new ArgumentNullException(nameof(context)); } if (author == null) { return(null); } EfAuthor ef = author.Id != Guid.Empty ? context.Authors.Find(author.Id) : null; if (ef == null) { if (author.Last == null) { return(null); } ef = new EfAuthor(); context.Authors.Add(ef); } if (author.Last != null) { ef.First = author.First; ef.Last = author.Last; ef.Lastx = StandardFilter.Apply(author.Last, true); ef.Suffix = author.Suffix; } return(ef); }
public async Task <IActionResult> GetList( [FromQuery] string keyword, [FromQuery] IList <int> routeIds, [FromQuery] IList <int> levels, [FromQuery] OrderBy orderBy = OrderBy.Score, [FromQuery] StandardFilter filter = StandardFilter.ActiveAvailable) { var queryResult = await _mediator.Send(new GetStandardsListQuery { Keyword = keyword, RouteIds = routeIds, Levels = levels, OrderBy = orderBy, Filter = filter }); var response = new GetStandardsListResponse { Standards = queryResult.Standards.Select(standard => (GetStandardResponse)standard), Total = queryResult.Total, TotalFiltered = queryResult.TotalFiltered }; return(Ok(response)); }
/// <summary> /// Creates a TokenStream which tokenizes all the text in the provided Reader. /// </summary> /// <returns>A TokenStream build from a ChineseTokenizer filtered with ChineseFilter.</returns> public override sealed TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream tokenStream = new YKChineseTokenizer(reader); tokenStream = new StandardFilter(tokenStream); return(new StopFilter(true, tokenStream, YKChineseAnalyzer.CHINESE_ENGLISH_STOP_WORDS, true)); }
//Para el procesamiento de textos internal string DeleteInvalidData(string result, string tipoAnalizador) { TokenStream tokenStream = new StandardTokenizer(Version.LUCENE_30, new System.IO.StringReader(result)); tokenStream = new StandardFilter(tokenStream); //elimina los signos de puntuación tokenStream = new LowerCaseFilter(tokenStream); //convierte el contenido a minúsculas if (tipoAnalizador == "Español") { //filtrará el contenido con el listado de stopWords tokenStream = new StopFilter(true, tokenStream, StopFilter.MakeStopSet(SpanishAnalyzer.SPANISH_STOP_WORDS)); //Convierte caracteres que estan por encima del 127 en la tabla ASCII tokenStream = new ASCIIFoldingFilter(tokenStream); //Operacion de lematización de la palabras tokenStream = SpanishSteammer(tokenStream); } else { //filtrará el contenido con el listado de stopWords tokenStream = new StopFilter(true, tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); //Operacion de lematización de la palabras tokenStream = new PorterStemFilter(tokenStream); } return(GetDataTokens(tokenStream)); }
/// <summary> /// Creates /// <see cref="TokenStreamComponents"/> /// used to tokenize all the text in the provided <see cref="TextReader"/>. /// </summary> /// <returns> <see cref="TokenStreamComponents"/> /// built from a <see cref="StandardTokenizer"/> filtered with /// <see cref="StandardFilter"/>, <see cref="LowerCaseFilter"/>, <see cref="ThaiWordFilter"/>, and /// <see cref="StopFilter"/> </returns> protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_48)) { Tokenizer source = new ThaiTokenizer(reader); TokenStream result = new LowerCaseFilter(m_matchVersion, source); result = new StopFilter(m_matchVersion, result, m_stopwords); return(new TokenStreamComponents(source, result)); } else { Tokenizer source = new StandardTokenizer(m_matchVersion, reader); TokenStream result = new StandardFilter(m_matchVersion, source); #pragma warning disable 612, 618 if (m_matchVersion.OnOrAfter(LuceneVersion.LUCENE_31)) #pragma warning restore 612, 618 { result = new LowerCaseFilter(m_matchVersion, result); } #pragma warning disable 612, 618 result = new ThaiWordFilter(m_matchVersion, result); #pragma warning restore 612, 618 return(new TokenStreamComponents(source, new StopFilter(m_matchVersion, result, m_stopwords))); } }
public async Task Then_Gets_Standards_List_From_Mediator( List <int> routeIds, List <int> levels, string keyword, OrderBy orderBy, StandardFilter filter, GetStandardsListResult queryResult, [Frozen] Mock <IMediator> mockMediator, [Greedy] StandardsController controller) { filter = StandardFilter.None; mockMediator .Setup(mediator => mediator.Send( It.Is <GetStandardsListQuery>(query => query.Keyword == keyword && query.RouteIds.Equals(routeIds) && query.Levels.Equals(levels) && query.OrderBy.Equals(orderBy) && query.Filter.Equals(filter)), It.IsAny <CancellationToken>())) .ReturnsAsync(queryResult); var controllerResult = await controller.GetList(keyword, routeIds, levels, orderBy, filter) as ObjectResult; var model = controllerResult.Value as GetStandardsListResponse; controllerResult.StatusCode.Should().Be((int)HttpStatusCode.OK); model.Standards.Should().BeEquivalentTo(queryResult.Standards, StandardToGetStandardResponseOptions.Exclusions); model.Total.Should().Be(queryResult.Total); model.TotalFiltered.Should().Be(queryResult.TotalFiltered); }
/// <summary> /// Creates /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// used to tokenize all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from a <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/> /// , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided, and <seealso cref="SnowballFilter"/> </returns> protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { if (matchVersion.onOrAfter(Version.LUCENE_31)) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader); Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.Empty) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer()); return(new TokenStreamComponents(source, result)); } else { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new RussianLetterTokenizer(matchVersion, reader); Tokenizer source = new RussianLetterTokenizer(matchVersion, reader); TokenStream result = new LowerCaseFilter(matchVersion, source); result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.Empty) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer()); return(new TokenStreamComponents(source, result)); } }
public async Task And_Has_Keyword_Then_Gets_Standards_From_SearchManager( string keyword, OrderBy orderBy, StandardFilter filter, List <Standard> standardsFromRepo, StandardSearchResultsList searchResult, [Frozen] Mock <IStandardRepository> mockStandardsRepository, [Frozen] Mock <ISearchManager> mockSearchManager, [Frozen] Mock <IStandardsSortOrderService> mockSortOrderService, StandardsService service) { searchResult.Standards = new List <StandardSearchResult> { new StandardSearchResult { StandardUId = standardsFromRepo[0].StandardUId } }; var standardsFoundInSearch = standardsFromRepo .Where(standard => searchResult.Standards.Select(result => result.StandardUId).Contains(standard.StandardUId)) .ToList(); mockStandardsRepository .Setup(repository => repository.GetStandards(new List <int>(), new List <int>(), filter)) .ReturnsAsync(standardsFromRepo); mockSearchManager .Setup(manager => manager.Query(keyword)) .Returns(searchResult); mockSortOrderService .Setup(orderService => orderService.OrderBy(standardsFoundInSearch, It.IsAny <OrderBy>(), It.IsAny <string>())) .Returns(standardsFoundInSearch.OrderBy(standard => standard.SearchScore)); var standards = await service.GetStandardsList(keyword, new List <int>(), new List <int>(), orderBy, filter); standards.Should().BeEquivalentTo(standardsFoundInSearch, StandardEquivalencyAssertionOptions.ExcludingFields); }
/// <summary> /// Gets the entity keyword corresponding to the specified keyword. /// </summary> /// <param name="keyword">The keyword or null.</param> /// <param name="context">The context.</param> /// <returns>The entity or null.</returns> /// <exception cref="ArgumentNullException">context</exception> public static EfKeyword GetEfKeyword(Keyword keyword, BiblioDbContext context) { if (context == null) { throw new ArgumentNullException(nameof(context)); } if (keyword == null) { return(null); } EfKeyword ef = context.Keywords.FirstOrDefault( k => k.Language == keyword.Language && k.Value == keyword.Value); if (ef == null) { ef = new EfKeyword { Language = keyword.Language, Value = keyword.Value, Valuex = StandardFilter.Apply(keyword.Value, true) }; context.Keywords.Add(ef); } return(ef); }
private static void PrepareKeywordFilter(KeywordFilter filter) { if (!string.IsNullOrEmpty(filter.Value)) { filter.Value = StandardFilter.Apply(filter.Value, true); } }
private static void PrepareAuthorFilter(AuthorFilter filter) { if (string.IsNullOrEmpty(filter.Last)) { filter.Last = StandardFilter.Apply(filter.Last, true); } }
public ActionResult GeneralDisplayContent(StandardFilter standardFilter) { Task.Delay(5000).Wait(); GeneralDisplayViewModel generalDisplayViewModel = CreateGeneralDisplayViewModel(standardFilter); // return view return(View(generalDisplayViewModel)); }
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) { TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(result); result = new ASCIIFoldingFilter(result); return(result); }
public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream filter = new StandardFilter(new LowerCaseTokenizer(reader)); filter = new PorterStemFilter(filter); return(filter); }
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) { StandardTokenizer standardTokenizer = new StandardTokenizer(matchVersion, reader); TokenStream stream = new StandardFilter(matchVersion, standardTokenizer); stream = new LowerCaseFilter(matchVersion, stream); stream = new StopFilter(matchVersion, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); return(new TokenStreamComponents(standardTokenizer, stream)); }
private static void AddKeywords(IList <Keyword> keywords, EfWork work, BiblioDbContext context) { // collect the keywords to be assigned, adding the missing ones List <EfKeywordWork> requested = new List <EfKeywordWork>(); foreach (Keyword keyword in keywords) { // find the keyword by its content, as we have no ID EfKeyword efk = context.Keywords.FirstOrDefault(k => k.Value == keyword.Value && k.Language == keyword.Language); // if not found, add it if (efk == null) { efk = new EfKeyword { Language = keyword.Language, Value = keyword.Value, Valuex = StandardFilter.Apply(keyword.Value, true) }; context.Keywords.Add(efk); } requested.Add(new EfKeywordWork { Keyword = efk, Work = work }); } // remove all the keywords which are no more requested if (work.KeywordWorks != null) { foreach (EfKeywordWork kw in work.KeywordWorks) { if (requested.All(r => r.KeywordId != kw.KeywordId)) { context.KeywordWorks.Remove(kw); } } } else { work.KeywordWorks = new List <EfKeywordWork>(); } // add all those which are not yet present foreach (EfKeywordWork kw in requested) { if (work.KeywordWorks.All( r => r.KeywordId != kw.KeywordId)) { work.KeywordWorks.Add(kw); } } }
public override TokenStream TokenStream(string fieldName, TextReader reader) { TokenStream result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); result = new RuSnowballFilter(result); return(result); }
/// <summary> /// Creates a TokenStream which tokenizes all the text in the provided TextReader. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <returns>A TokenStream build from a StandardTokenizer filtered with StandardFilter, StopFilter, GermanStemFilter</returns> public override TokenStream TokenStream(String fieldName, TextReader reader) { TokenStream result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new StopFilter(result, stoptable); result = new DutchStemFilter(result, excltable, _stemdict); return(result); }
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: //ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader) protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader); UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader); src.MaxTokenLength = maxTokenLength; TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader); }
public SqlWhere(StandardFilter filter) { Parameters = new List<Literal>(); Clause = Apply("{0}", filter); }
/// <summary> /// Creates a /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// which tokenizes all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> A /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from an <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="StandardFilter"/>, <seealso cref="SoraniNormalizationFilter"/>, /// <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/> /// , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided and <seealso cref="SoraniStemFilter"/>. </returns> protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader); Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new SoraniNormalizationFilter(result); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.Empty) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new SoraniStemFilter(result); return new TokenStreamComponents(source, result); }
public override TokenStreamComponents CreateComponents(string fieldName, Reader reader) { UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader); src.MaxTokenLength = maxTokenLength; TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader); }
/// <summary> /// Creates a /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// which tokenizes all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> A /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from an <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="StandardFilter"/>, <seealso cref="ElisionFilter"/>, <seealso cref="LowerCaseFilter"/>, /// <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided and <seealso cref="SnowballFilter"/>. </returns> protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader); Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); if (matchVersion.onOrAfter(Version.LUCENE_36)) { result = new ElisionFilter(result, DEFAULT_ARTICLES); } result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if (!stemExclusionSet.Empty) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new SnowballFilter(result, new CatalanStemmer()); return new TokenStreamComponents(source, result); }
public Not(StandardFilter filter) { this.Filter = filter; }
public And(StandardFilter leftFilter, StandardFilter rightFilter) { this.LeftFilter = leftFilter; this.RightFilter = rightFilter; }
/// <summary> /// Creates /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// used to tokenize all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from a <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="LowerCaseFilter"/>, <seealso cref="StandardFilter"/>, <seealso cref="StopFilter"/> /// , and <seealso cref="BrazilianStemFilter"/>. </returns> protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new LowerCaseFilter(matchVersion, source); result = new StandardFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if (excltable != null && !excltable.Empty) { result = new SetKeywordMarkerFilter(result, excltable); } return new TokenStreamComponents(source, new BrazilianStemFilter(result)); }
/// <summary> /// Creates /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// used to tokenize all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from a <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="GreekLowerCaseFilter"/>, <seealso cref="StandardFilter"/>, /// <seealso cref="StopFilter"/>, and <seealso cref="GreekStemFilter"/> </returns> protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader); Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new GreekLowerCaseFilter(matchVersion, source); if (matchVersion.onOrAfter(Version.LUCENE_31)) { result = new StandardFilter(matchVersion, result); } result = new StopFilter(matchVersion, result, stopwords); if (matchVersion.onOrAfter(Version.LUCENE_31)) { result = new GreekStemFilter(result); } return new TokenStreamComponents(source, result); }
/// <summary> /// Creates /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// used to tokenize all the text in the provided <seealso cref="Reader"/>. /// </summary> /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/> /// built from a <seealso cref="StandardTokenizer"/> filtered with /// <seealso cref="StandardFilter"/>, <seealso cref="ElisionFilter"/>, /// <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>, /// <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is /// provided, and <seealso cref="FrenchLightStemFilter"/> </returns> protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader) { if (matchVersion.onOrAfter(Version.LUCENE_31)) { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader); Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new ElisionFilter(result, DEFAULT_ARTICLES); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if (!excltable.Empty) { result = new SetKeywordMarkerFilter(result, excltable); } if (matchVersion.onOrAfter(Version.LUCENE_36)) { result = new FrenchLightStemFilter(result); } else { result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer()); } return new TokenStreamComponents(source, result); } else { //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final': //ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader); Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new StopFilter(matchVersion, result, stopwords); if (!excltable.Empty) { result = new SetKeywordMarkerFilter(result, excltable); } result = new FrenchStemFilter(result); // Convert to lowercase after stemming! return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result)); } }