public static List <string> LoadIndexFile(Crawling crawling) { CreateDirectoryIfNeed(IndexDirectory); string filePath = IndexDirectory + "\\" + crawling.Name + ".dat"; List <string> indexs = new List <string>(); FileInfo fi = new FileInfo(filePath); if (!fi.Exists) { return(indexs); } BinaryReader fbr = new BinaryReader(File.Open(filePath, FileMode.Open)); int pos = 0; int length = (int)fbr.BaseStream.Length; while (pos < length) { try { string index = fbr.ReadString(); indexs.Add(index); pos += index.Length + 1; } catch { break; } } fbr.Close(); return(indexs); }
private async Task DoCrawling( Crawling crawling, ICrawlingRepository repository, CancellationToken stoppingToken) { try { crawling.Status = CrawlingStatus.InProgress; await repository.UpdateCrawlingAsync(crawling); var options = new CrawlingOptions(crawling.Expression, crawling.Url); var crawlingResult = await _crawlingService.CrawlAsync(options, stoppingToken); crawling.CrawlingDetails = _mapper.MapDetails(crawling.Id, crawlingResult); crawling.Status = CrawlingStatus.Completed; await repository.UpdateCrawlingAsync(crawling); } catch (Exception e) { crawling.Status = CrawlingStatus.Failed; crawling.StatusText = e.ToString(); await repository.UpdateCrawlingAsync(crawling); _logger.LogError(e, $"Error while crawling {crawling?.Url} in background service."); } }
private void OnStartCrawling(object sender, EventArgs e) { if (CreateInput()) { m_start_button.IsEnabled = false; m_stop_button.IsEnabled = true; int id = InputData.CrawlingStartId; int?endId = InputData.CreateNullInt32(InputData.CrawlingEndId); int?maxExCount = InputData.CreateNullInt32(InputData.CrawlingMaxExCount); int taskCount = InputData.TaskCount; var moed = GetCrawlingMoedValue(m_crawling_mode_picker.SelectedItem.ToString()); m_crawling = Crawling.Start(moed, maxExCount, id, endId, taskCount, new TimeSpan(0, 0, ConstInfo.CRAWLING_TIMEOUT)); m_crawling.Task.ContinueWith((t) => MainThread.BeginInvokeOnMainThread(() => InitInputView())); } else { DisplayAlert("错误", "必须输入参数", "确定"); } }
public void Map_CrawlingIsPassed_ReturnsMappedModel() { var expectedResult = new CrawlingModel { Id = 200, CreatedOn = new DateTime(2021, 3, 21, 1, 2, 3), Expression = "test expression", HitsCount = 21, Status = "InProgress", Url = "test url" }; var input = new Crawling { Id = 200, CreatedOn = new DateTime(2021, 3, 21, 1, 2, 3), Expression = "test expression", Status = CrawlingStatus.InProgress, Url = "test url", CrawlingDetails = new List <CrawlingDetails> { new CrawlingDetails { HitsCount = 10 }, new CrawlingDetails { HitsCount = 11 } } }; var actualResult = _sut.Map(input); actualResult.Should().BeEquivalentTo(expectedResult); }
public async Task UpdateCrawlingAsync(Crawling crawling) { crawling.UpdatedOn = DateTime.UtcNow; _dbContext.Crawlings.Update(crawling); await _dbContext.SaveChangesAsync(); }
public void MapDetails_CrawlingDetailsContainData_ReturnsMappedCollection() { var expectedResult = new List <CrawlingDetailsModel> { new CrawlingDetailsModel { Url = "test1.com", HitsCount = 10 }, new CrawlingDetailsModel { Url = "test2.com", HitsCount = 20 } }; var crawling = new Crawling { CrawlingDetails = new List <CrawlingDetails> { new CrawlingDetails { Url = "test1.com", HitsCount = 10 }, new CrawlingDetails { Url = "test2.com", HitsCount = 20 } } }; var actualResult = _sut.MapDetails(crawling); actualResult.Should().BeEquivalentTo(expectedResult); }
public static bool StasrtCrawling(string optionUrl, Crawling crawling, List <string> indexs) { crawling.Url.Option = optionUrl; HtmlDocument html = DownloadHtml(crawling.Url.ToString()); if (!IsSuccess(html)) { return(false); } List <List <string> > results = EncodingWordHelper.GetEncodedWords( html, crawling.WordList, crawling.OtherCrawlingList, crawling.BaseXPath); foreach (BaseAction action in crawling.ActionList) { List <Word> words = crawling.WordList .Concat(crawling.OtherCrawlingList .SelectMany(x => x.CrawlingPointer.WordList)).ToList(); ExcuteAction( action, words, words.Find(x => x.IsIndex), crawling, optionUrl, results, indexs); } return(true); }
public void Dispose_IsCalled_OnceCrawling() { var moqRepo = new Mock <IRepository>(); var crawler = new Crawling(moqRepo.Object); crawler.Dispose(); moqRepo.Verify(i => i.Dispose(), Times.Once()); }
public void Show(Crawling crawl, int selected) { if (selected == position) { Console.BackgroundColor = ConsoleColor.White; Console.ForegroundColor = ConsoleColor.Black; } Console.WriteLine(ListingText + ": " + crawl.GetType().GetProperty(PropertyName).GetValue(crawl)); Console.ResetColor(); }
public static void SaveFile(Crawling crawling) { if (crawling == null) { return; } CreateDirectoryIfNeed(CrawlingDirectory); Serialize(crawling, CrawlingDirectory + "\\" + crawling.Name + ".cr"); }
public async Task <int> CreateAsync(string expression, string url, string userId) { var crawling = new Crawling { Expression = expression, Url = url, Status = CrawlingStatus.Scheduled, ApplicationUserId = userId }; await _dbContext.Crawlings.AddAsync(crawling); await _dbContext.SaveChangesAsync(); return(crawling.Id); }
public CrawlingModel Map(Crawling crawling) { if (crawling == null) { throw new ArgumentNullException(nameof(crawling), "Unable to map Crawling"); } return(new CrawlingModel { Id = crawling.Id, CreatedOn = crawling.CreatedOn, Expression = crawling.Expression, Status = crawling.Status.ToString(), Url = crawling.Url, HitsCount = crawling.CrawlingDetails?.Sum(detail => detail.HitsCount) ?? 0 }); }
public static void Serialize(Crawling crawling, string fileName) { FileStream fs = new FileStream(fileName, FileMode.Create); BinaryFormatter formatter = new BinaryFormatter(); try { formatter.Serialize(fs, crawling); } catch (SerializationException e) { MessageBox.Show("Fail: " + e.Message); } finally { fs.Close(); } }
public static void SaveIndexFile(Crawling crawling, string index) { CreateDirectoryIfNeed(IndexDirectory); string filePath = IndexDirectory + "\\" + crawling.Name + ".dat"; BinaryWriter fbw; FileInfo fi = new FileInfo(filePath); if (!fi.Exists) { fbw = new BinaryWriter(File.Open(filePath, FileMode.Create)); } else { fbw = new BinaryWriter(File.Open(filePath, FileMode.Append)); } fbw.Write(index); fbw.Close(); }
public static List <Crawling> LoadFiles() { CreateDirectoryIfNeed(CrawlingDirectory); List <Crawling> crawlings = new List <Crawling>(); DirectoryInfo di = new DirectoryInfo(CrawlingDirectory); foreach (FileInfo file in di.GetFiles()) { if (file.Extension.ToLower().CompareTo(".cr") == 0) { Crawling now = Deserialize(file.FullName); if (now != null) { crawlings.Add(now); } } } return(crawlings); }
public IEnumerable <CrawlingDetailsModel> MapDetails(Crawling crawling) { if (crawling == null) { throw new ArgumentNullException(nameof(crawling), "Unable to map crawling details"); } if (crawling.CrawlingDetails == null) { return(Enumerable.Empty <CrawlingDetailsModel>()); } return(crawling .CrawlingDetails .Select(details => new CrawlingDetailsModel { Url = details.Url, HitsCount = details.HitsCount }) .ToList()); }
public static List <Word> CrawlingOne(Crawling crawling, string urlOption) { if (crawling == null) { return(new List <Word>()); } crawling.Url.Option = urlOption; var html = InfiniteDownloadHtml(crawling.Url.ToString()); var nodes = GetResults(html, crawling.BaseXPath); if (nodes == null || nodes.Count < 1) { return(new List <Word>()); } EncodingWordHelper.SetOriginalWords(crawling.WordList, nodes[0]); EncodingWordHelper.SetEncodedWords(crawling.WordList); return(crawling.WordList); }
public static Crawling Deserialize(string file_name) { FileStream fs = new FileStream(file_name, FileMode.Open); BinaryFormatter formatter = new BinaryFormatter(); Crawling crawling = null; try { crawling = formatter.Deserialize(fs) as Crawling; } catch (SerializationException e) { MessageBox.Show("Fail: " + e.Message); } finally { fs.Close(); } return(crawling); }
public static void ExcuteAction( BaseAction action, List <Word> words, Word indexWord, Crawling crawling, string option, List <List <string> > results, List <string> indexs) { switch (action.ActionType) { case Enums.BaseActionType.NOTIFITY: NotificationHelper.CreateCollection(crawling.Name, option); break; } foreach (var result in results) { for (int i = 0; i < words.Count; i++) { words[i].Encoded = result[i]; } if (indexWord != null && !indexs.Contains(indexWord.Encoded)) { switch (action.ActionType) { case Enums.BaseActionType.NOTIFITY: NotificationHelper.SendToastAsync(action as MyToast, crawling.Name, option); break; } indexs.Add(indexWord.Encoded); FileHelper.SaveIndexFile(crawling, indexWord.Encoded); } } }
public static void StartCrawlings(WhenCrawling when, Crawling crawling, List <string> indexs, bool isBoot) { if (crawling.UrlOptionLIst.Count == 0) { crawling.UrlOptionLIst.Add(new UrlOption()); } try { foreach (UrlOption urlOption in crawling.UrlOptionLIst) { if (IsPassedTime(when, urlOption.LastCrawling, isBoot)) { bool success = StasrtCrawling(urlOption.Option, crawling, indexs); if (success) { urlOption.LastCrawling = DateTime.Now; FileHelper.SaveFile(crawling); } } } } catch { } }
public Crawling SelectValue(Crawling crawl) { string newValue; switch (this.Type) { case (0): //If String parent.PrintMenu(); Console.Write("\nNew Value: "); Console.ForegroundColor = ConsoleColor.Green; newValue = Console.ReadLine(); Console.ResetColor(); crawl.GetType().GetProperty(PropertyName).SetValue(crawl, newValue); break; case (1): // If bool bool done = false; bool selected = bool.Parse(crawl.GetType().GetProperty(PropertyName).GetValue(crawl).ToString()); while (!done) { parent.PrintMenu(); Console.WriteLine("\n"); if (selected) { Console.BackgroundColor = ConsoleColor.White; Console.ForegroundColor = ConsoleColor.Black; } Console.Write("True"); Console.ResetColor(); Console.Write(" "); if (!selected) { Console.BackgroundColor = ConsoleColor.White; Console.ForegroundColor = ConsoleColor.Black; } Console.WriteLine("False"); Console.ResetColor(); ConsoleKeyInfo key = Console.ReadKey(); switch (key.Key) { case (ConsoleKey.LeftArrow): if (!selected) { selected = true; } break; case (ConsoleKey.RightArrow): if (selected) { selected = false; } break; case (ConsoleKey.Enter): crawl.GetType().GetProperty(PropertyName).SetValue(crawl, selected); done = true; break; } } break; case (2): // If Int parent.PrintMenu(); Console.Write("\nNew Value: "); Console.ForegroundColor = ConsoleColor.Green; newValue = Console.ReadLine(); Console.ResetColor(); crawl.GetType().GetProperty(PropertyName).SetValue(crawl, int.Parse(newValue)); break; } return(crawl); }
public CrawlingViewModel() { Crawling = new Crawling(); }
public CrawlingViewModel(string name, Crawling crawling) { Crawling = crawling; Crawling.Name = name; }
public CustomWordCollectionViewModel(Crawling nowCrawling, ObservableCollection <CrawlingViewModel> crawlingList) : this() { this.nowCrawling = nowCrawling; this._crawlingList = crawlingList; }