public static List <string> LoadIndexFile(Crawling crawling)
        {
            CreateDirectoryIfNeed(IndexDirectory);

            string        filePath = IndexDirectory + "\\" + crawling.Name + ".dat";
            List <string> indexs   = new List <string>();
            FileInfo      fi       = new FileInfo(filePath);

            if (!fi.Exists)
            {
                return(indexs);
            }

            BinaryReader fbr = new BinaryReader(File.Open(filePath, FileMode.Open));

            int pos    = 0;
            int length = (int)fbr.BaseStream.Length;

            while (pos < length)
            {
                try
                {
                    string index = fbr.ReadString();
                    indexs.Add(index);
                    pos += index.Length + 1;
                }
                catch
                {
                    break;
                }
            }

            fbr.Close();
            return(indexs);
        }
        private async Task DoCrawling(
            Crawling crawling,
            ICrawlingRepository repository,
            CancellationToken stoppingToken)
        {
            try
            {
                crawling.Status = CrawlingStatus.InProgress;
                await repository.UpdateCrawlingAsync(crawling);

                var options        = new CrawlingOptions(crawling.Expression, crawling.Url);
                var crawlingResult = await _crawlingService.CrawlAsync(options, stoppingToken);

                crawling.CrawlingDetails = _mapper.MapDetails(crawling.Id, crawlingResult);
                crawling.Status          = CrawlingStatus.Completed;
                await repository.UpdateCrawlingAsync(crawling);
            }
            catch (Exception e)
            {
                crawling.Status     = CrawlingStatus.Failed;
                crawling.StatusText = e.ToString();
                await repository.UpdateCrawlingAsync(crawling);

                _logger.LogError(e, $"Error while crawling {crawling?.Url} in background service.");
            }
        }
Exemple #3
0
        private void OnStartCrawling(object sender, EventArgs e)
        {
            if (CreateInput())
            {
                m_start_button.IsEnabled = false;

                m_stop_button.IsEnabled = true;

                int id = InputData.CrawlingStartId;

                int?endId = InputData.CreateNullInt32(InputData.CrawlingEndId);

                int?maxExCount = InputData.CreateNullInt32(InputData.CrawlingMaxExCount);

                int taskCount = InputData.TaskCount;

                var moed = GetCrawlingMoedValue(m_crawling_mode_picker.SelectedItem.ToString());

                m_crawling = Crawling.Start(moed, maxExCount, id, endId, taskCount, new TimeSpan(0, 0, ConstInfo.CRAWLING_TIMEOUT));

                m_crawling.Task.ContinueWith((t) => MainThread.BeginInvokeOnMainThread(() => InitInputView()));
            }
            else
            {
                DisplayAlert("错误", "必须输入参数", "确定");
            }
        }
Exemple #4
0
        public void Map_CrawlingIsPassed_ReturnsMappedModel()
        {
            var expectedResult = new CrawlingModel
            {
                Id         = 200,
                CreatedOn  = new DateTime(2021, 3, 21, 1, 2, 3),
                Expression = "test expression",
                HitsCount  = 21,
                Status     = "InProgress",
                Url        = "test url"
            };

            var input = new Crawling
            {
                Id              = 200,
                CreatedOn       = new DateTime(2021, 3, 21, 1, 2, 3),
                Expression      = "test expression",
                Status          = CrawlingStatus.InProgress,
                Url             = "test url",
                CrawlingDetails = new List <CrawlingDetails>
                {
                    new CrawlingDetails {
                        HitsCount = 10
                    },
                    new CrawlingDetails {
                        HitsCount = 11
                    }
                }
            };

            var actualResult = _sut.Map(input);

            actualResult.Should().BeEquivalentTo(expectedResult);
        }
Exemple #5
0
        public async Task UpdateCrawlingAsync(Crawling crawling)
        {
            crawling.UpdatedOn = DateTime.UtcNow;
            _dbContext.Crawlings.Update(crawling);

            await _dbContext.SaveChangesAsync();
        }
Exemple #6
0
        public void MapDetails_CrawlingDetailsContainData_ReturnsMappedCollection()
        {
            var expectedResult = new List <CrawlingDetailsModel>
            {
                new CrawlingDetailsModel {
                    Url = "test1.com", HitsCount = 10
                },
                new CrawlingDetailsModel {
                    Url = "test2.com", HitsCount = 20
                }
            };

            var crawling = new Crawling
            {
                CrawlingDetails = new List <CrawlingDetails>
                {
                    new CrawlingDetails {
                        Url = "test1.com", HitsCount = 10
                    },
                    new CrawlingDetails {
                        Url = "test2.com", HitsCount = 20
                    }
                }
            };

            var actualResult = _sut.MapDetails(crawling);

            actualResult.Should().BeEquivalentTo(expectedResult);
        }
Exemple #7
0
        public static bool StasrtCrawling(string optionUrl, Crawling crawling, List <string> indexs)
        {
            crawling.Url.Option = optionUrl;
            HtmlDocument html = DownloadHtml(crawling.Url.ToString());

            if (!IsSuccess(html))
            {
                return(false);
            }

            List <List <string> > results = EncodingWordHelper.GetEncodedWords(
                html,
                crawling.WordList,
                crawling.OtherCrawlingList,
                crawling.BaseXPath);

            foreach (BaseAction action in crawling.ActionList)
            {
                List <Word> words = crawling.WordList
                                    .Concat(crawling.OtherCrawlingList
                                            .SelectMany(x => x.CrawlingPointer.WordList)).ToList();
                ExcuteAction(
                    action,
                    words,
                    words.Find(x => x.IsIndex),
                    crawling,
                    optionUrl,
                    results,
                    indexs);
            }

            return(true);
        }
Exemple #8
0
        public void Dispose_IsCalled_OnceCrawling()
        {
            var moqRepo = new Mock <IRepository>();
            var crawler = new Crawling(moqRepo.Object);

            crawler.Dispose();
            moqRepo.Verify(i => i.Dispose(), Times.Once());
        }
 public void Show(Crawling crawl, int selected)
 {
     if (selected == position)
     {
         Console.BackgroundColor = ConsoleColor.White; Console.ForegroundColor = ConsoleColor.Black;
     }
     Console.WriteLine(ListingText + ": " + crawl.GetType().GetProperty(PropertyName).GetValue(crawl));
     Console.ResetColor();
 }
 public static void SaveFile(Crawling crawling)
 {
     if (crawling == null)
     {
         return;
     }
     CreateDirectoryIfNeed(CrawlingDirectory);
     Serialize(crawling, CrawlingDirectory + "\\" + crawling.Name + ".cr");
 }
Exemple #11
0
        public async Task <int> CreateAsync(string expression, string url, string userId)
        {
            var crawling = new Crawling
            {
                Expression        = expression,
                Url               = url,
                Status            = CrawlingStatus.Scheduled,
                ApplicationUserId = userId
            };

            await _dbContext.Crawlings.AddAsync(crawling);

            await _dbContext.SaveChangesAsync();

            return(crawling.Id);
        }
Exemple #12
0
        public CrawlingModel Map(Crawling crawling)
        {
            if (crawling == null)
            {
                throw new ArgumentNullException(nameof(crawling), "Unable to map Crawling");
            }

            return(new CrawlingModel
            {
                Id = crawling.Id,
                CreatedOn = crawling.CreatedOn,
                Expression = crawling.Expression,
                Status = crawling.Status.ToString(),
                Url = crawling.Url,
                HitsCount = crawling.CrawlingDetails?.Sum(detail => detail.HitsCount) ?? 0
            });
        }
        public static void Serialize(Crawling crawling, string fileName)
        {
            FileStream      fs        = new FileStream(fileName, FileMode.Create);
            BinaryFormatter formatter = new BinaryFormatter();

            try
            {
                formatter.Serialize(fs, crawling);
            }
            catch (SerializationException e)
            {
                MessageBox.Show("Fail: " + e.Message);
            }
            finally
            {
                fs.Close();
            }
        }
        public static void SaveIndexFile(Crawling crawling, string index)
        {
            CreateDirectoryIfNeed(IndexDirectory);

            string       filePath = IndexDirectory + "\\" + crawling.Name + ".dat";
            BinaryWriter fbw;
            FileInfo     fi = new FileInfo(filePath);

            if (!fi.Exists)
            {
                fbw = new BinaryWriter(File.Open(filePath, FileMode.Create));
            }
            else
            {
                fbw = new BinaryWriter(File.Open(filePath, FileMode.Append));
            }
            fbw.Write(index);
            fbw.Close();
        }
        public static List <Crawling> LoadFiles()
        {
            CreateDirectoryIfNeed(CrawlingDirectory);
            List <Crawling> crawlings = new List <Crawling>();
            DirectoryInfo   di        = new DirectoryInfo(CrawlingDirectory);

            foreach (FileInfo file in di.GetFiles())
            {
                if (file.Extension.ToLower().CompareTo(".cr") == 0)
                {
                    Crawling now = Deserialize(file.FullName);
                    if (now != null)
                    {
                        crawlings.Add(now);
                    }
                }
            }
            return(crawlings);
        }
Exemple #16
0
        public IEnumerable <CrawlingDetailsModel> MapDetails(Crawling crawling)
        {
            if (crawling == null)
            {
                throw new ArgumentNullException(nameof(crawling), "Unable to map crawling details");
            }

            if (crawling.CrawlingDetails == null)
            {
                return(Enumerable.Empty <CrawlingDetailsModel>());
            }

            return(crawling
                   .CrawlingDetails
                   .Select(details => new CrawlingDetailsModel {
                Url = details.Url, HitsCount = details.HitsCount
            })
                   .ToList());
        }
Exemple #17
0
        public static List <Word> CrawlingOne(Crawling crawling, string urlOption)
        {
            if (crawling == null)
            {
                return(new List <Word>());
            }

            crawling.Url.Option = urlOption;
            var html  = InfiniteDownloadHtml(crawling.Url.ToString());
            var nodes = GetResults(html, crawling.BaseXPath);

            if (nodes == null || nodes.Count < 1)
            {
                return(new List <Word>());
            }

            EncodingWordHelper.SetOriginalWords(crawling.WordList, nodes[0]);
            EncodingWordHelper.SetEncodedWords(crawling.WordList);

            return(crawling.WordList);
        }
        public static Crawling Deserialize(string file_name)
        {
            FileStream      fs        = new FileStream(file_name, FileMode.Open);
            BinaryFormatter formatter = new BinaryFormatter();
            Crawling        crawling  = null;

            try
            {
                crawling = formatter.Deserialize(fs) as Crawling;
            }
            catch (SerializationException e)
            {
                MessageBox.Show("Fail: " + e.Message);
            }
            finally
            {
                fs.Close();
            }

            return(crawling);
        }
Exemple #19
0
        public static void ExcuteAction(
            BaseAction action,
            List <Word> words,
            Word indexWord,
            Crawling crawling,
            string option,
            List <List <string> > results,
            List <string> indexs)
        {
            switch (action.ActionType)
            {
            case Enums.BaseActionType.NOTIFITY:
                NotificationHelper.CreateCollection(crawling.Name, option);
                break;
            }

            foreach (var result in results)
            {
                for (int i = 0; i < words.Count; i++)
                {
                    words[i].Encoded = result[i];
                }

                if (indexWord != null && !indexs.Contains(indexWord.Encoded))
                {
                    switch (action.ActionType)
                    {
                    case Enums.BaseActionType.NOTIFITY:
                        NotificationHelper.SendToastAsync(action as MyToast, crawling.Name, option);
                        break;
                    }
                    indexs.Add(indexWord.Encoded);
                    FileHelper.SaveIndexFile(crawling, indexWord.Encoded);
                }
            }
        }
Exemple #20
0
        public static void StartCrawlings(WhenCrawling when, Crawling crawling, List <string> indexs, bool isBoot)
        {
            if (crawling.UrlOptionLIst.Count == 0)
            {
                crawling.UrlOptionLIst.Add(new UrlOption());
            }

            try
            {
                foreach (UrlOption urlOption in crawling.UrlOptionLIst)
                {
                    if (IsPassedTime(when, urlOption.LastCrawling, isBoot))
                    {
                        bool success = StasrtCrawling(urlOption.Option, crawling, indexs);
                        if (success)
                        {
                            urlOption.LastCrawling = DateTime.Now;
                            FileHelper.SaveFile(crawling);
                        }
                    }
                }
            }
            catch { }
        }
        public Crawling SelectValue(Crawling crawl)
        {
            string newValue;

            switch (this.Type)
            {
            case (0):     //If String
                parent.PrintMenu();
                Console.Write("\nNew Value: ");
                Console.ForegroundColor = ConsoleColor.Green;
                newValue = Console.ReadLine();
                Console.ResetColor();
                crawl.GetType().GetProperty(PropertyName).SetValue(crawl, newValue);
                break;

            case (1):     // If bool
                bool done     = false;
                bool selected = bool.Parse(crawl.GetType().GetProperty(PropertyName).GetValue(crawl).ToString());
                while (!done)
                {
                    parent.PrintMenu();
                    Console.WriteLine("\n");
                    if (selected)
                    {
                        Console.BackgroundColor = ConsoleColor.White; Console.ForegroundColor = ConsoleColor.Black;
                    }
                    Console.Write("True");
                    Console.ResetColor();
                    Console.Write("   ");
                    if (!selected)
                    {
                        Console.BackgroundColor = ConsoleColor.White; Console.ForegroundColor = ConsoleColor.Black;
                    }
                    Console.WriteLine("False");
                    Console.ResetColor();
                    ConsoleKeyInfo key = Console.ReadKey();
                    switch (key.Key)
                    {
                    case (ConsoleKey.LeftArrow):
                        if (!selected)
                        {
                            selected = true;
                        }
                        break;

                    case (ConsoleKey.RightArrow):
                        if (selected)
                        {
                            selected = false;
                        }
                        break;

                    case (ConsoleKey.Enter):
                        crawl.GetType().GetProperty(PropertyName).SetValue(crawl, selected);
                        done = true;
                        break;
                    }
                }
                break;

            case (2):     // If Int
                parent.PrintMenu();
                Console.Write("\nNew Value: ");
                Console.ForegroundColor = ConsoleColor.Green;
                newValue = Console.ReadLine();
                Console.ResetColor();
                crawl.GetType().GetProperty(PropertyName).SetValue(crawl, int.Parse(newValue));
                break;
            }
            return(crawl);
        }
Exemple #22
0
 public CrawlingViewModel()
 {
     Crawling = new Crawling();
 }
Exemple #23
0
 public CrawlingViewModel(string name, Crawling crawling)
 {
     Crawling      = crawling;
     Crawling.Name = name;
 }
 public CustomWordCollectionViewModel(Crawling nowCrawling, ObservableCollection <CrawlingViewModel> crawlingList)
     : this()
 {
     this.nowCrawling   = nowCrawling;
     this._crawlingList = crawlingList;
 }