Пример #1
0
        public static ISiteCrawler Create(SiteParameter siteParameter)
        {
            if (siteParameter == null)
            {
                throw new ArgumentNullException(nameof(siteParameter));
            }

            string dataServiceName = GetValueOrDefault(siteParameter.CustomProcessors, "IDataService");
            string htmlReaderName  = GetValueOrDefault(siteParameter.CustomProcessors, "IHtmlReader");
            string pageParserName  = GetValueOrDefault(siteParameter.CustomProcessors, "IPageParser");
            string itemReaderName  = GetValueOrDefault(siteParameter.CustomProcessors, "IItemReader");
            string pageReaderName  = GetValueOrDefault(siteParameter.CustomProcessors, "IPageReader");

            IHtmlReader       htmlReader          = Container.Resolve <IHtmlReader>(htmlReaderName);
            ParameterOverride htmlReaderParameter = new ParameterOverride("htmlReader", htmlReader);

            ParameterOverride siteParameterParameter = new ParameterOverride("siteParameter", siteParameter);

            IItemReader       itemReader          = Container.Resolve <IItemReader>(itemReaderName, siteParameterParameter);
            ParameterOverride itemReaderParameter = new ParameterOverride("itemReader", itemReader);

            IPageReader pageReader = Container.Resolve <IPageReader>(pageReaderName, siteParameterParameter, htmlReaderParameter, itemReaderParameter);

            IPageParser pageParser = Container.Resolve <IPageParser>(pageParserName, siteParameterParameter, htmlReaderParameter);

            return(new GeneralSiteCrawler(pageReader, pageParser));
        }
Пример #2
0
 public MessageFetcher(IPageReader remote, ICheckpointReader remotePos, IMemoryStreamManager streamManager, string streamName)
 {
     _remote        = remote;
     _remotePos     = remotePos;
     _streamManager = streamManager;
     StreamName     = streamName;
 }
        private static Image GetModifiedImage(IPageReader pageReader)
        {
            var rawBytes = pageReader.GetImage();

            var width  = pageReader.GetPageWidth();
            var height = pageReader.GetPageHeight();

            var characters = pageReader.GetCharacters();

            using (var bmp = new Bitmap(width, height, PixelFormat.Format32bppArgb))
            {
                var background = new Bitmap(width, height, PixelFormat.Format32bppArgb);
                bmp.AddBytes(rawBytes);

                using (var g = Graphics.FromImage(background))
                {
                    g.Clear(Color.White);
                    g.DrawImageUnscaled(bmp, Point.Empty);
                }


                //bmp.DrawRectangles(characters);

                return(background);
            }
        }
Пример #4
0
        public GeneralSiteCrawler(SiteParameter siteParameter)
        {
            IItemReader itemReader = new RegexItemReader(siteParameter);

            IHtmlReader htmlReader = new HttpClientReader();

            this.pageReader = new SequentialPageReader(siteParameter, htmlReader, itemReader);
            this.pageParser = new RegexPageParser(siteParameter, htmlReader);
        }
Пример #5
0
        public PageReadStream(IPageReader reader, long start, long max, byte[] buffer)
        {
            _reader = reader;

            _max = max;
            _buffer = buffer;
            _position = start;
            _mem = new MemoryStream(buffer);
            _mem.SetLength(0);
        }
Пример #6
0
        public PageReadStream(IPageReader reader, long start, long max, byte[] buffer)
        {
            _reader = reader;

            _max      = max;
            _buffer   = buffer;
            _position = start;
            _mem      = new MemoryStream(buffer);
            _mem.SetLength(0);
        }
Пример #7
0
        public DevourTarget(int attempts, Uri uri, IPageReader reader, int lifes = 1)
        {
            if (uri == null || reader == null)
            {
                throw new ArgumentException("Bad arguments");
            }

            Attempts = attempts;
            Lifes    = lifes;
            _uri     = uri;
            _reader  = reader;
        }
Пример #8
0
 public MessageCopier(
     IPageReader sourceReader,
     ICheckpointReader sourcePos,
     IMemoryStreamManager streamManager,
     IPageWriter targetWriter,
     ICheckpointWriter targetPos)
 {
     _sourceReader  = sourceReader;
     SourcePos      = sourcePos;
     _streamManager = streamManager;
     _targetWriter  = targetWriter;
     TargetPos      = targetPos;
 }
Пример #9
0
        public void PageToImage(int pageIndex, int width, int height)
        {
            IDocReader  docReader  = DocLib.Instance.GetDocReader(Context.Document, new PageDimensions(width, height));
            IPageReader pageReader = docReader.GetPageReader(pageIndex);

            MemoryStream mem = new MemoryStream();

            Image <Bgra32> image = Image.LoadPixelData <Bgra32>(pageReader.GetImage(), pageReader.GetPageWidth(), pageReader.GetPageHeight());

            image.SaveAsPng(mem);

            Context.Result = new ImageResult(mem.ToArray(), MimeTypes.Png);
        }
Пример #10
0
 public GeneralSiteCrawler(IPageReader pageReader, IPageParser pageParser, IDataService dataService)
 {
     this.pageReader  = pageReader ?? throw new ArgumentNullException(nameof(pageReader));
     this.pageParser  = pageParser ?? throw new ArgumentNullException(nameof(pageParser));
     this.dataService = dataService ?? throw new ArgumentNullException(nameof(dataService));
     this.pageParser.SetErrorHandler((url, exception) =>
                                     this.dataService.AddLog(new CrawlerLog
     {
         Url     = url,
         LogTime = DateTime.Now,
         Message = exception.Message
     }));
 }
Пример #11
0
        public GeneralSiteCrawler(SiteParameter siteParameter)
        {
            this.dataService = new DbDataService(CrawlerDbHelper.GetContext());
            IItemReader itemReader = new RegexItemReader(siteParameter);

            IHtmlReader htmlReader = new HttpClientReader();

            this.pageReader = new SequentialPageReader(siteParameter, htmlReader, itemReader);
            this.pageParser = new RegexPageParser(siteParameter, htmlReader);
            this.pageParser.SetErrorHandler((url, exception) =>
                                            this.dataService.AddLog(new CrawlerLog
            {
                Url     = url,
                LogTime = DateTime.Now,
                Message = exception.Message
            }));
        }
Пример #12
0
        private static byte[] GetModifiedImage(IPageReader pageReader)
        {
            var rawBytes = pageReader.GetImage(RenderFlags.RenderAnnotations | RenderFlags.Grayscale);

            var width  = pageReader.GetPageWidth();
            var height = pageReader.GetPageHeight();

            using (var bmp = new Bitmap(width, height, PixelFormat.Format32bppArgb))
            {
                bmp.AddBytes(rawBytes);

                using (var stream = new MemoryStream())
                {
                    bmp.Save(stream, ImageFormat.Png);

                    return(stream.ToArray());
                }
            }
        }
Пример #13
0
        /// <summary>
        /// Creates a new instance of <see cref="ContainerReader"/>.
        /// </summary>
        /// <param name="stream">The <see cref="Stream"/> to read.</param>
        /// <param name="closeOnDispose"><c>True</c> to close the stream when disposed, otherwise <c>false</c>.</param>
        /// <exception cref="ArgumentException"><paramref name="stream"/>'s <see cref="Stream.CanSeek"/> is <c>False</c>.</exception>
        public ContainerReader(Stream stream, bool closeOnDispose)
        {
            if (stream == null)
            {
                throw new ArgumentNullException(nameof(stream));
            }

            _packetProviders = new List <WeakReference <Contracts.IPacketProvider> >();

            if (stream.CanSeek)
            {
                _reader = CreatePageReader(stream, closeOnDispose, ProcessNewStream);
                CanSeek = true;
            }
            else
            {
                _reader = CreateForwardOnlyPageReader(stream, closeOnDispose, ProcessNewStream);
            }
        }
Пример #14
0
        private static byte[] GetModifiedImage(IPageReader pageReader)
        {
            var rawBytes = pageReader.GetImage();

            var width  = pageReader.GetPageWidth();
            var height = pageReader.GetPageHeight();

            var characters = pageReader.GetCharacters();

            using (var bmp = new Bitmap(width, height, PixelFormat.Format32bppArgb))
            {
                bmp.AddBytes(rawBytes);

                //bmp.DrawRectangles(characters);

                using (var stream = new MemoryStream())
                {
                    bmp.Save(stream, ImageFormat.Png);

                    return(stream.ToArray());
                }
            }
        }
Пример #15
0
 public MessageReader(ICheckpointReader position, IPageReader messages)
 {
     Position = position;
     Messages = messages;
     _buffer  = new byte[Limit];
 }
Пример #16
0
 public MessageReader(ICheckpointReader position, IPageReader messages) {
     _position = position;
     _messages = messages;
     _buffer = new byte[Limit];
 }
Пример #17
0
 /// <summary>
 /// Cleans up
 /// </summary>
 public void Dispose()
 {
     _reader?.Dispose();
     _reader = null;
 }
Пример #18
0
 public PageReadStreamTests()
 {
     _mem = new MemoryStream();
     _given = new BinaryWriter(_mem, Encoding.UTF8, true);
     _reader = new MemoryPageReader(_mem);
 }
Пример #19
0
        public DevourTarget(int attempts, Uri uri, IPageReader reader, int lifes = 1)
        {
            if (uri == null || reader == null)
                throw new ArgumentException("Bad arguments");

            Attempts = attempts;
            Lifes = lifes;
            _uri = uri;
            _reader = reader;
        }
Пример #20
0
 public PageReadStreamTests()
 {
     _mem    = new MemoryStream();
     _given  = new BinaryWriter(_mem, Encoding.UTF8, true);
     _reader = new MemoryPageReader(_mem);
 }
Пример #21
0
 public GeneralSiteCrawler(IPageReader pageReader, IPageParser pageParser)
 {
     this.pageReader = pageReader ?? throw new ArgumentNullException(nameof(pageReader));
     this.pageParser = pageParser ?? throw new ArgumentNullException(nameof(pageParser));
 }
 public PageReader(IPageReader pageReader, IDocReader doc2)
 {
     this.pageReader = pageReader;
     this.doc2       = doc2;
 }