Beispiel #1
0
        private static void CheckFiles(String databaseFileName)
        {
            var fileDatabase = new FileDatabase(databaseFileName);

            int count = fileDatabase.GetFileCount();

            var baseDirectory = Path.GetDirectoryName(databaseFileName);
            var webCrawler = new WebCrawler(baseDirectory);

            for (var i = 0; i < count; i++)
            {
                var fileDatabaseRecord = fileDatabase.GetFile(i);

                if (fileDatabaseRecord.Size < 0)
                {
                    var webHeaders = webCrawler.DownloadHeaders(fileDatabaseRecord.Url);
                    fileDatabaseRecord.Modify(webHeaders.LastModified, webHeaders.ContentLength, webHeaders.ContentType, "", false, false);
                }

                var fileName = Path.Combine(baseDirectory, fileDatabaseRecord.FileName);
                var fileInfo = new FileInfo(fileName);

                if (fileInfo.Exists)
                {
                    fileDatabaseRecord.Available = true;
                    fileDatabaseRecord.OutOfDate = (fileInfo.Length != fileDatabaseRecord.Size) ||
                        (fileInfo.LastWriteTime != fileDatabaseRecord.Modified);
                }

                fileDatabase.AddOrReplaceFile(fileDatabaseRecord);
            }
        }
Beispiel #2
0
        static private void CheckFiles(String databaseFileName)
        {
            var fileDatabase = new FileDatabase(databaseFileName);

            int count = fileDatabase.GetFileCount();

            var baseDirectory = Path.GetDirectoryName(databaseFileName);
            var webCrawler    = new WebCrawler(baseDirectory);

            for (var i = 0; i < count; i++)
            {
                var fileDatabaseRecord = fileDatabase.GetFile(i);

                if (fileDatabaseRecord.Size < 0)
                {
                    var webHeaders = webCrawler.DownloadHeaders(fileDatabaseRecord.Url);
                    fileDatabaseRecord.Modify(webHeaders.LastModified, webHeaders.ContentLength, webHeaders.ContentType, "", false, false);
                }

                var fileName = Path.Combine(baseDirectory, fileDatabaseRecord.FileName);
                var fileInfo = new FileInfo(fileName);

                if (fileInfo.Exists)
                {
                    fileDatabaseRecord.Available = true;
                    fileDatabaseRecord.OutOfDate = (fileInfo.Length != fileDatabaseRecord.Size) ||
                                                   (fileInfo.LastWriteTime != fileDatabaseRecord.Modified);
                }

                fileDatabase.AddOrReplaceFile(fileDatabaseRecord);
            }
        }
Beispiel #3
0
        public WebCrawler(String baseDirectory = null, Int32 minDelay = 1000, Int32 maxDelay = 3000)
            : base(baseDirectory, minDelay, maxDelay)
        {
            _baseDirectory = baseDirectory;
            EnsureDirectoryExists(_baseDirectory);

            _tempDirectory = Path.Combine(_baseDirectory, "_temp");
            EnsureDirectoryExists(_tempDirectory);

            var fileName = Path.Combine(_baseDirectory, "_files.sqlite");
            _fileDatabase = new FileDatabase(fileName);
        }
Beispiel #4
0
        public WebCrawler(String baseDirectory = null, Int32 minDelay = 1000, Int32 maxDelay = 3000) : base(baseDirectory, minDelay, maxDelay)
        {
            _baseDirectory = baseDirectory;
            EnsureDirectoryExists(_baseDirectory);

            _tempDirectory = Path.Combine(_baseDirectory, "_temp");
            EnsureDirectoryExists(_tempDirectory);

            var fileName = Path.Combine(_baseDirectory, "_files.sqlite");

            _fileDatabase = new FileDatabase(fileName);
        }
Beispiel #5
0
        static private void PrintInfo(String databaseFileName)
        {
            var fileDatabase = new FileDatabase(databaseFileName);

            int count     = fileDatabase.GetFileCount();
            int available = fileDatabase.GetAvailableFileCount();
            int outOfDate = fileDatabase.GetOutOfDateFileCount();

            Console.WriteLine("Total files:         {0}", count);
            Console.WriteLine("Available files:     {0}", available);
            Console.WriteLine("Out-of-date files:   {0}", outOfDate);
            Console.WriteLine("Up-to-date files:    {0} ({1:N1}%)", available - outOfDate, (available - outOfDate) * 100.0 / count);
        }
Beispiel #6
0
        static private void UpdateFiles(String databaseFileName)
        {
            var fileDatabase = new FileDatabase(databaseFileName);

            var webCrawler = new WebCrawler(Path.GetDirectoryName(databaseFileName));

            while (true)
            {
                var fileDatabaseRecord = fileDatabase.GetNextNotAvailableOrOutOfDateFile();
                if (null == fileDatabaseRecord)
                {
                    return;
                }

                webCrawler.UpdateFile(fileDatabaseRecord);
            }
        }
Beispiel #7
0
        private static void ListFiles(String databaseFileName)
        {
            var fileDatabase = new FileDatabase(databaseFileName);

            int count = fileDatabase.GetFileCount();
            Console.WriteLine("{0} files in database:", count);
            Console.WriteLine();

            for (var i = 0; i < count; i++)
            {
                var fileDatabaseRecord = fileDatabase.GetFile(i);

                Console.WriteLine("Url:       {0}", fileDatabaseRecord.Url);
                Console.WriteLine("FileName:  {0}", fileDatabaseRecord.FileName);
                Console.WriteLine("Modified:  {0}", fileDatabaseRecord.Modified);
                Console.WriteLine("Size:      {0}", fileDatabaseRecord.Size);
                Console.WriteLine("Type:      {0}", fileDatabaseRecord.Type);
                Console.WriteLine("Checksum:  {0}", fileDatabaseRecord.Checksum);
                Console.WriteLine("Available: {0}", fileDatabaseRecord.Available);
                Console.WriteLine("OutOfDate: {0}", fileDatabaseRecord.OutOfDate);
                Console.WriteLine();
            }
        }
Beispiel #8
0
        static private void ListFiles(String databaseFileName)
        {
            var fileDatabase = new FileDatabase(databaseFileName);

            int count = fileDatabase.GetFileCount();

            Console.WriteLine("{0} files in database:", count);
            Console.WriteLine();

            for (var i = 0; i < count; i++)
            {
                var fileDatabaseRecord = fileDatabase.GetFile(i);

                Console.WriteLine("Url:       {0}", fileDatabaseRecord.Url);
                Console.WriteLine("FileName:  {0}", fileDatabaseRecord.FileName);
                Console.WriteLine("Modified:  {0}", fileDatabaseRecord.Modified);
                Console.WriteLine("Size:      {0}", fileDatabaseRecord.Size);
                Console.WriteLine("Type:      {0}", fileDatabaseRecord.Type);
                Console.WriteLine("Checksum:  {0}", fileDatabaseRecord.Checksum);
                Console.WriteLine("Available: {0}", fileDatabaseRecord.Available);
                Console.WriteLine("OutOfDate: {0}", fileDatabaseRecord.OutOfDate);
                Console.WriteLine();
            }
        }
Beispiel #9
0
        private static void PrintInfo(String databaseFileName)
        {
            var fileDatabase = new FileDatabase(databaseFileName);

            int count = fileDatabase.GetFileCount();
            int available = fileDatabase.GetAvailableFileCount();
            int outOfDate = fileDatabase.GetOutOfDateFileCount();

            Console.WriteLine("Total files:         {0}", count);
            Console.WriteLine("Available files:     {0}", available);
            Console.WriteLine("Out-of-date files:   {0}", outOfDate);
            Console.WriteLine("Up-to-date files:    {0} ({1:N1}%)", available - outOfDate, (available - outOfDate) * 100.0 / count);
        }
Beispiel #10
0
        private static void UpdateFiles(String databaseFileName)
        {
            var fileDatabase = new FileDatabase(databaseFileName);

            var webCrawler = new WebCrawler(Path.GetDirectoryName(databaseFileName));

            while (true)
            {
                var fileDatabaseRecord = fileDatabase.GetNextNotAvailableOrOutOfDateFile();
                if (null == fileDatabaseRecord)
                {
                    return;
                }

                webCrawler.UpdateFile(fileDatabaseRecord);
            }
        }