public BracketPipeTextExtractor() { StopWords = false; MinimumLength = int.MinValue; MaximumLength = int.MaxValue; Granularity = ExtractionGranularity.Raw; Distinct = false; Filter = BracketPipeTextExtractorFilterType.Raw; }
public async Task <HttpResponseMessage> GetWebResourceCacheDataText( string datahash, BracketPipeTextExtractorFilterType filter = BracketPipeTextExtractorFilterType.Raw, int minlen = int.MinValue, int maxlen = int.MaxValue, bool distinct = true, bool stopWords = true, ExtractionGranularity granularity = ExtractionGranularity.Raw ) { try { using (var db = new Database()) { byte[] bytes = await db.GetWebResourceCacheData(new MD5Hash(datahash)); if (bytes == null) { return(Create404Response((object)null)); } var l = new List <BracketPipeTextFragment>(); using (var ms = new MemoryStream(bytes)) { var parser = new BracketPipeTextExtractor { Distinct = distinct, Granularity = granularity, MaximumLength = maxlen, MinimumLength = minlen, StopWords = stopWords, Filter = filter }; parser.Parse(ms, l.Add); } return(CreateOKResponse(l)); } } catch (Exception ex) { return(CreateExceptionResponse(ex)); } }
public Func <BracketPipeTextFragment, bool> GetFilter(BracketPipeTextExtractorFilterType filterType) { switch (filterType) { case BracketPipeTextExtractorFilterType.Core: return((x) => GenericTagFilter(CoreTags, x)); case BracketPipeTextExtractorFilterType.NonCore: return((x) => !GenericTagFilter(CoreTags, x)); case BracketPipeTextExtractorFilterType.Links: return((x) => GenericTagFilter(LinkTags, x)); case BracketPipeTextExtractorFilterType.Headers: return((x) => GenericTagFilter(HeaderTags, x)); case BracketPipeTextExtractorFilterType.Time: return((x) => GenericTagFilter(TimeTags, x)); case BracketPipeTextExtractorFilterType.Raw: default: return((x) => true); } }