public static ISiteCrawler Create(SiteParameter siteParameter) { if (siteParameter == null) { throw new ArgumentNullException(nameof(siteParameter)); } string dataServiceName = GetValueOrDefault(siteParameter.CustomProcessors, "IDataService"); string htmlReaderName = GetValueOrDefault(siteParameter.CustomProcessors, "IHtmlReader"); string pageParserName = GetValueOrDefault(siteParameter.CustomProcessors, "IPageParser"); string itemReaderName = GetValueOrDefault(siteParameter.CustomProcessors, "IItemReader"); string pageReaderName = GetValueOrDefault(siteParameter.CustomProcessors, "IPageReader"); IHtmlReader htmlReader = Container.Resolve <IHtmlReader>(htmlReaderName); ParameterOverride htmlReaderParameter = new ParameterOverride("htmlReader", htmlReader); ParameterOverride siteParameterParameter = new ParameterOverride("siteParameter", siteParameter); IItemReader itemReader = Container.Resolve <IItemReader>(itemReaderName, siteParameterParameter); ParameterOverride itemReaderParameter = new ParameterOverride("itemReader", itemReader); IPageReader pageReader = Container.Resolve <IPageReader>(pageReaderName, siteParameterParameter, htmlReaderParameter, itemReaderParameter); IPageParser pageParser = Container.Resolve <IPageParser>(pageParserName, siteParameterParameter, htmlReaderParameter); return(new GeneralSiteCrawler(pageReader, pageParser)); }
public MessageFetcher(IPageReader remote, ICheckpointReader remotePos, IMemoryStreamManager streamManager, string streamName) { _remote = remote; _remotePos = remotePos; _streamManager = streamManager; StreamName = streamName; }
private static Image GetModifiedImage(IPageReader pageReader) { var rawBytes = pageReader.GetImage(); var width = pageReader.GetPageWidth(); var height = pageReader.GetPageHeight(); var characters = pageReader.GetCharacters(); using (var bmp = new Bitmap(width, height, PixelFormat.Format32bppArgb)) { var background = new Bitmap(width, height, PixelFormat.Format32bppArgb); bmp.AddBytes(rawBytes); using (var g = Graphics.FromImage(background)) { g.Clear(Color.White); g.DrawImageUnscaled(bmp, Point.Empty); } //bmp.DrawRectangles(characters); return(background); } }
public GeneralSiteCrawler(SiteParameter siteParameter) { IItemReader itemReader = new RegexItemReader(siteParameter); IHtmlReader htmlReader = new HttpClientReader(); this.pageReader = new SequentialPageReader(siteParameter, htmlReader, itemReader); this.pageParser = new RegexPageParser(siteParameter, htmlReader); }
public PageReadStream(IPageReader reader, long start, long max, byte[] buffer) { _reader = reader; _max = max; _buffer = buffer; _position = start; _mem = new MemoryStream(buffer); _mem.SetLength(0); }
public DevourTarget(int attempts, Uri uri, IPageReader reader, int lifes = 1) { if (uri == null || reader == null) { throw new ArgumentException("Bad arguments"); } Attempts = attempts; Lifes = lifes; _uri = uri; _reader = reader; }
public MessageCopier( IPageReader sourceReader, ICheckpointReader sourcePos, IMemoryStreamManager streamManager, IPageWriter targetWriter, ICheckpointWriter targetPos) { _sourceReader = sourceReader; SourcePos = sourcePos; _streamManager = streamManager; _targetWriter = targetWriter; TargetPos = targetPos; }
public void PageToImage(int pageIndex, int width, int height) { IDocReader docReader = DocLib.Instance.GetDocReader(Context.Document, new PageDimensions(width, height)); IPageReader pageReader = docReader.GetPageReader(pageIndex); MemoryStream mem = new MemoryStream(); Image <Bgra32> image = Image.LoadPixelData <Bgra32>(pageReader.GetImage(), pageReader.GetPageWidth(), pageReader.GetPageHeight()); image.SaveAsPng(mem); Context.Result = new ImageResult(mem.ToArray(), MimeTypes.Png); }
public GeneralSiteCrawler(IPageReader pageReader, IPageParser pageParser, IDataService dataService) { this.pageReader = pageReader ?? throw new ArgumentNullException(nameof(pageReader)); this.pageParser = pageParser ?? throw new ArgumentNullException(nameof(pageParser)); this.dataService = dataService ?? throw new ArgumentNullException(nameof(dataService)); this.pageParser.SetErrorHandler((url, exception) => this.dataService.AddLog(new CrawlerLog { Url = url, LogTime = DateTime.Now, Message = exception.Message })); }
public GeneralSiteCrawler(SiteParameter siteParameter) { this.dataService = new DbDataService(CrawlerDbHelper.GetContext()); IItemReader itemReader = new RegexItemReader(siteParameter); IHtmlReader htmlReader = new HttpClientReader(); this.pageReader = new SequentialPageReader(siteParameter, htmlReader, itemReader); this.pageParser = new RegexPageParser(siteParameter, htmlReader); this.pageParser.SetErrorHandler((url, exception) => this.dataService.AddLog(new CrawlerLog { Url = url, LogTime = DateTime.Now, Message = exception.Message })); }
private static byte[] GetModifiedImage(IPageReader pageReader) { var rawBytes = pageReader.GetImage(RenderFlags.RenderAnnotations | RenderFlags.Grayscale); var width = pageReader.GetPageWidth(); var height = pageReader.GetPageHeight(); using (var bmp = new Bitmap(width, height, PixelFormat.Format32bppArgb)) { bmp.AddBytes(rawBytes); using (var stream = new MemoryStream()) { bmp.Save(stream, ImageFormat.Png); return(stream.ToArray()); } } }
/// <summary> /// Creates a new instance of <see cref="ContainerReader"/>. /// </summary> /// <param name="stream">The <see cref="Stream"/> to read.</param> /// <param name="closeOnDispose"><c>True</c> to close the stream when disposed, otherwise <c>false</c>.</param> /// <exception cref="ArgumentException"><paramref name="stream"/>'s <see cref="Stream.CanSeek"/> is <c>False</c>.</exception> public ContainerReader(Stream stream, bool closeOnDispose) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } _packetProviders = new List <WeakReference <Contracts.IPacketProvider> >(); if (stream.CanSeek) { _reader = CreatePageReader(stream, closeOnDispose, ProcessNewStream); CanSeek = true; } else { _reader = CreateForwardOnlyPageReader(stream, closeOnDispose, ProcessNewStream); } }
private static byte[] GetModifiedImage(IPageReader pageReader) { var rawBytes = pageReader.GetImage(); var width = pageReader.GetPageWidth(); var height = pageReader.GetPageHeight(); var characters = pageReader.GetCharacters(); using (var bmp = new Bitmap(width, height, PixelFormat.Format32bppArgb)) { bmp.AddBytes(rawBytes); //bmp.DrawRectangles(characters); using (var stream = new MemoryStream()) { bmp.Save(stream, ImageFormat.Png); return(stream.ToArray()); } } }
public MessageReader(ICheckpointReader position, IPageReader messages) { Position = position; Messages = messages; _buffer = new byte[Limit]; }
public MessageReader(ICheckpointReader position, IPageReader messages) { _position = position; _messages = messages; _buffer = new byte[Limit]; }
/// <summary> /// Cleans up /// </summary> public void Dispose() { _reader?.Dispose(); _reader = null; }
public PageReadStreamTests() { _mem = new MemoryStream(); _given = new BinaryWriter(_mem, Encoding.UTF8, true); _reader = new MemoryPageReader(_mem); }
public DevourTarget(int attempts, Uri uri, IPageReader reader, int lifes = 1) { if (uri == null || reader == null) throw new ArgumentException("Bad arguments"); Attempts = attempts; Lifes = lifes; _uri = uri; _reader = reader; }
public GeneralSiteCrawler(IPageReader pageReader, IPageParser pageParser) { this.pageReader = pageReader ?? throw new ArgumentNullException(nameof(pageReader)); this.pageParser = pageParser ?? throw new ArgumentNullException(nameof(pageParser)); }
public PageReader(IPageReader pageReader, IDocReader doc2) { this.pageReader = pageReader; this.doc2 = doc2; }