public BracketPipeTextExtractor()
 {
     StopWords     = false;
     MinimumLength = int.MinValue;
     MaximumLength = int.MaxValue;
     Granularity   = ExtractionGranularity.Raw;
     Distinct      = false;
     Filter        = BracketPipeTextExtractorFilterType.Raw;
 }
Example #2
0
        public async Task <HttpResponseMessage> GetWebResourceCacheDataText(
            string datahash,
            BracketPipeTextExtractorFilterType filter = BracketPipeTextExtractorFilterType.Raw,
            int minlen     = int.MinValue,
            int maxlen     = int.MaxValue,
            bool distinct  = true,
            bool stopWords = true,
            ExtractionGranularity granularity = ExtractionGranularity.Raw
            )
        {
            try
            {
                using (var db = new Database())
                {
                    byte[] bytes = await db.GetWebResourceCacheData(new MD5Hash(datahash));

                    if (bytes == null)
                    {
                        return(Create404Response((object)null));
                    }

                    var l = new List <BracketPipeTextFragment>();
                    using (var ms = new MemoryStream(bytes))
                    {
                        var parser = new BracketPipeTextExtractor
                        {
                            Distinct      = distinct,
                            Granularity   = granularity,
                            MaximumLength = maxlen,
                            MinimumLength = minlen,
                            StopWords     = stopWords,
                            Filter        = filter
                        };

                        parser.Parse(ms, l.Add);
                    }

                    return(CreateOKResponse(l));
                }
            }
            catch (Exception ex)
            {
                return(CreateExceptionResponse(ex));
            }
        }
        public Func <BracketPipeTextFragment, bool> GetFilter(BracketPipeTextExtractorFilterType filterType)
        {
            switch (filterType)
            {
            case BracketPipeTextExtractorFilterType.Core:
                return((x) => GenericTagFilter(CoreTags, x));

            case BracketPipeTextExtractorFilterType.NonCore:
                return((x) => !GenericTagFilter(CoreTags, x));

            case BracketPipeTextExtractorFilterType.Links:
                return((x) => GenericTagFilter(LinkTags, x));

            case BracketPipeTextExtractorFilterType.Headers:
                return((x) => GenericTagFilter(HeaderTags, x));

            case BracketPipeTextExtractorFilterType.Time:
                return((x) => GenericTagFilter(TimeTags, x));

            case BracketPipeTextExtractorFilterType.Raw:
            default:
                return((x) => true);
            }
        }