Beispiel #1
0
        public Stato(string filepath, Fetcho fetcho, Queueo queueo, ReadLinko reado)
        {
            Running     = true;
            this.fetcho = fetcho;
            this.queueo = queueo;
            this.reado  = reado;

            filepath = Utility.CreateNewFileOrIndexNameIfExists(filepath);
            writer   = new StreamWriter(new FileStream(filepath, FileMode.Append, FileAccess.Write, FileShare.Read));

            // fetcho stats
            Stats.Add(new StatInfo()
            {
                Header = "Chk #", Format = "{0,5}", Calculate = () => fetcho.ActiveChunkCount
            });
            Stats.Add(new StatInfo()
            {
                Header = "IP w#", Format = "{0,5}", Calculate = () => fetcho.WaitingFromIPCongestion
            });
            Stats.Add(new StatInfo()
            {
                Header = "Fe t#", Format = "{0,5}", Calculate = () => fetcho.WaitingForFetchTimeout
            });
            Stats.Add(new StatInfo()
            {
                Header = "Act #", Format = "{0,5}", Calculate = () => ResourceFetcher.ActiveFetches
            });
            Stats.Add(new StatInfo()
            {
                Header = "Wr w#", Format = "{0,5}", Calculate = () => ResourceFetcher.WaitingToWrite
            });
            Stats.Add(new StatInfo()
            {
                Header = " Exception #", Format = "{0,12}", Calculate = () => ResourceFetcher.FetchExceptions
            });
            Stats.Add(new StatInfo()
            {
                Header = " Completed #", Format = "{0,12}", Calculate = () => fetcho.CompletedFetches
            });
            Stats.Add(new StatInfo()
            {
                Header = "Uptime          ", Format = "{0,16}", Calculate = () => fetcho.Uptime
            });
            Stats.Add(new StatInfo()
            {
                Header    = "pg/print",
                Format    = "{0,8}",
                Calculate = () => {
                    var diff  = fetcho.CompletedFetches - completed;
                    completed = fetcho.CompletedFetches;
                    return(diff);
                }
            });
            Stats.Add(new StatInfo()
            {
                Header = "tot pg/m", Format = "{0,8}", Calculate = () => fetcho.TotalPagesPerMinute
            });

            // queue-o stats
            Stats.Add(new StatInfo()
            {
                Header = "Q In#", Format = "{0,5}", Calculate = () => queueo.InboxCount
            });
            Stats.Add(new StatInfo()
            {
                Header = "Duplicates #", Format = "{0,12}", Calculate = () => queueo.DuplicatesRejected
            });
            Stats.Add(new StatInfo()
            {
                Header = "Qp t#", Format = "{0,5}", Calculate = () => queueo.ActivePreQueueTasks
            });
            Stats.Add(new StatInfo()
            {
                Header = "Qq i#", Format = "{0,5}", Calculate = () => queueo.BufferCount
            });
            Stats.Add(new StatInfo()
            {
                Header = "DB w#", Format = "{0,5}", Calculate = () => DatabasePool.WaitingForDatabase
            });
            Stats.Add(new StatInfo()
            {
                Header = "Qv t#", Format = "{0,5}", Calculate = () => queueo.ActiveValidationTasks
            });
            Stats.Add(new StatInfo()
            {
                Header = "Qo i#", Format = "{0,5}", Calculate = () => queueo.OutboxCount
            });
            Stats.Add(new StatInfo()
            {
                Header = "    Accepted", Format = "{0,12}", Calculate = () => queueo.LinksAccepted
            });
            Stats.Add(new StatInfo()
            {
                Header = "    Rejected", Format = "{0,12}", Calculate = () => queueo.LinksRejected
            });

            Stats.Add(new StatInfo()
            {
                Header = "DSIdx", Format = "{0,5}", Calculate = () => reado.CurrentDataSourceIndex
            });
            Stats.Add(new StatInfo()
            {
                Header = "PktIx", Format = "{0,5}", Calculate = () => reado.CurrentPacketIndex
            });
            Stats.Add(new StatInfo()
            {
                Header = "    Rsc Proc", Format = "{0,12}", Calculate = () => reado.ResourcesProcessed
            });
            Stats.Add(new StatInfo()
            {
                Header = "   Extracted", Format = "{0,12}", Calculate = () => reado.LinksExtracted
            });
            Stats.Add(new StatInfo()
            {
                Header = "ROut", Format = "{0,4}", Calculate = () => reado.OutboxCount
            });

            writer.WriteLine(GetStatHeader());
        }
Beispiel #2
0
        public static async Task Main(string[] args)
        {
            if (args.Length < 2)
            {
                Usage();
                return;
            }

            string paths = args[0];

            int.TryParse(args[1], out int startPacketIndex);

            // turn on log4net
            log4net.Config.XmlConfigurator.Configure();

            // catch all errors and log them
            AppDomain.CurrentDomain.UnhandledException += (sender, eventArgs) => Utility.LogException(eventArgs.ExceptionObject as Exception);

            // ignore all certificate validation issues
            ServicePointManager.ServerCertificateValidationCallback = (sender, cert, chain, sslPolicyErrors) => true;

            // console encoding will now be unicode
            Console.OutputEncoding = System.Text.Encoding.UTF8;

            // database init
            DatabasePool.Initialise();

            // configure fetcho
            await SetupConfiguration(paths);

            // upgrade the database
            DatabaseUpgrader.Upgrade();

            // buffers to connect the seperate tasks together
            BufferBlock <IEnumerable <QueueItem> > prioritisationBuffer = CreateBufferBlock(DefaultBufferBlockLimit);
            // really beef this buffers max size up since it takes for ever accumulate so we dont want to lose any
            BufferBlock <IEnumerable <QueueItem> > fetchQueueBuffer = CreateBufferBlock(DefaultBufferBlockLimit * 1000);
            //BufferBlock<IEnumerable<QueueItem>> requeueBuffer = CreateBufferBlock(DefaultBufferBlockLimit);
            ITargetBlock <IEnumerable <QueueItem> > outboxWriter   = CreateOutboxWriter();
            BufferBlock <IWebResourceWriter>        dataWriterPool = CreateDataWriterPool();

            // fetcho!
            var readLinko = new ReadLinko(prioritisationBuffer, startPacketIndex);
            var queueo    = new Queueo(prioritisationBuffer, fetchQueueBuffer, outboxWriter); // DataflowBlock.NullTarget<IEnumerable<QueueItem>>()
            var fetcho    = new Fetcho(fetchQueueBuffer, DataflowBlock.NullTarget <IEnumerable <QueueItem> >(), dataWriterPool);
            var stato     = new Stato("stats.csv", fetcho, queueo, readLinko);
            var controlo  = new Controlo(prioritisationBuffer, fetchQueueBuffer, dataWriterPool, () =>
            {
                readLinko.Shutdown();
                queueo.Shutdown();
                fetcho.Shutdown();
                stato.Shutdown();
            });
            //var requeueWriter = new BufferBlockObjectFileWriter<IEnumerable<QueueItem>>(cfg.DataSourcePath, "requeue", requeueBuffer);
            //var rejectsWriter = new BufferBlockObjectFileWriter<IEnumerable<QueueItem>>(cfg.DataSourcePath, "rejects", new NullTarget);

            // execute
            var tasks = new List <Task>();

            tasks.Add(stato.Process());
            tasks.Add(fetcho.Process());
            await Task.Delay(1000);

            tasks.Add(queueo.Process());
            await Task.Delay(1000);

            tasks.Add(readLinko.Process());
            tasks.Add(controlo.Process());

            await Task.WhenAll(tasks.ToArray()).ConfigureAwait(false);

            CloseAllWriters(dataWriterPool);
            DatabasePool.DestroyAll();
        }