private string GetSelectorValue(Page page, BaseSelector selector) { string totalStr = string.Empty; if (selector.Type == SelectorType.Enviroment) { if (SelectorUtils.Parse(TotalPageSelector) is EnviromentSelector enviromentSelector) { totalStr = EntityExtractor.GetEnviromentValue(enviromentSelector.Field, page, 0); } } else { totalStr = page.Selectable.Select(SelectorUtils.Parse(TotalPageSelector)).GetValue(); } if (!string.IsNullOrEmpty(totalStr) && TotalPageFormatters != null) { foreach (var formatter in TotalPageFormatters) { totalStr = formatter.Formate(totalStr); } } if (string.IsNullOrEmpty(totalStr)) { throw new SpiderException("The result of total selector is null."); } else { return(totalStr); } }
public void Extract() { EntityExtractor <Product> extractor = new EntityExtractor <Product>(); var results = extractor.Extract(new Page(new Request("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, dynamic> { { "cat", "手机" }, { "cat3", "110" } }) { Site = new Site() }) { Content = File.ReadAllText(Path.Combine(Env.BaseDirectory, "Jd.html")) }); Assert.Equal(60, results.Count); Assert.Equal("手机", results[0].CategoryName); Assert.Equal(110, results[0].CategoryId); Assert.Equal("http://item.jd.com/3031737.html", results[0].Url); Assert.Equal("3031737", results[0].Sku); Assert.Equal("荣耀官方旗舰店", results[0].ShopName); Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].Name); Assert.Equal("1000000904", results[0].VenderId); Assert.Equal("1000000904", results[0].JdzyShopId); Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0].RunId.ToString("yyyy-MM-dd")); }
private void Run() { List <String> fileNames = tbInputFiles.Text.Split(new String[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries).ToList(); List <String> includes = new List <string>(); List <String> excludes = new List <string>(); foreach (var fileName in fileNames) { if (fileName.StartsWith("-")) { excludes.Add(fileName.Substring(1)); } else if (!fileName.StartsWith("#")) { includes.Add(fileName); } } try { EntityExtractor ext = LocalizationModule.Create(LocalizerType).Extractor; ext.Logger = WindowLogger; Dictionary <String, LocalizableEntity> map1 = new Dictionary <string, LocalizableEntity>(); ext.ProcessFileList(includes.ToArray(), excludes.ToArray(), RootPathResolved, FileExtension, map1); map1 = map1.OrderBy(a => a.Value.ShallowEntityPath).ToDictionary(a => a.Key, a => a.Value); Dictionary <String, LocalizableEntity> map2 = new Dictionary <string, LocalizableEntity>(); try { ext.ProcessFile(ReferenceResolved, map2); } catch (Exception ex) { WindowLogger.LogFormat("Warning: error parsing reference input ({0})", ex.Message); } map2 = map2.OrderBy(a => a.Value.ShallowEntityPath).ToDictionary(a => a.Key, a => a.Value); Dictionary <String, LocalizableEntity> map3 = new Dictionary <string, LocalizableEntity>(); Diff(map1, map2, map3); map3 = map3.OrderBy(a => a.Value.ShallowEntityPath).ToDictionary(a => a.Key, a => a.Value); dgvNew.DataSource = GetLocalizationGridDataSource(map1, false); dgvCurrent.DataSource = GetLocalizationGridDataSource(map2, true); dgvDeleted.DataSource = GetLocalizationGridDataSource(map3, false); } catch (Exception ex) { WindowLogger.Log(ex.Message); } }
public override void Build(Site site) { Dictionary <Column, List <Formatter> > dic = new Dictionary <Column, List <Formatter> >(); foreach (var column in Columns) { List <Formatter> formatters = EntityExtractor.GenerateFormatter(column.Formatters); dic.Add(column, formatters); } using (var conn = new MySqlConnection(ConnectString)) { var data = conn.Query(GetSelectQueryString()); Parallel.ForEach(data, new ParallelOptions { MaxDegreeOfParallelism = 1 }, brand => { IDictionary <string, object> tmp = (IDictionary <string, object>)brand; List <string> arguments = new List <string>(); foreach (var column in Columns) { string value = tmp[column.Name]?.ToString(); foreach (var formatter in dic[column]) { value = formatter.Formate(value); } arguments.Add(value); } string tmpUrl = string.Format(FormateString, arguments.Cast <object>().ToArray()); site.AddStartUrl(tmpUrl, tmp); }); } }
public void Extract() { var entityMetadata = EntitySpider.GenerateEntityDefine(typeof(Product).GetTypeInfo()); EntityExtractor extractor = new EntityExtractor("test", null, entityMetadata); var results = extractor.Extract(new Page(new Request("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, dynamic> { { "cat", "手机" }, { "cat3", "110" } }), null) { Content = File.ReadAllText(Path.Combine(Core.Environment.BaseDirectory, "Jd.html")) }); Assert.Equal(60, results.Count); Assert.Equal("手机", results[0]["CategoryName"]); Assert.Equal("110", results[0]["CategoryId"]); Assert.Equal("http://item.jd.com/3031737.html", results[0]["Url"]); Assert.Equal("3031737", results[0]["Sku"]); Assert.Equal("荣耀官方旗舰店", results[0]["ShopName"]); Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0]["Name"]); Assert.Equal("1000000904", results[0]["VenderId"]); Assert.Equal("1000000904", results[0]["JdzyShopId"]); Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0]["RunId"]); }
public bool NeedStop(Page page, BaseTargetUrlsCreator creator) { int totalPage = -2000; if (TotalPageSelector != null) { string totalStr = string.Empty; if (TotalPageSelector.Type == SelectorType.Enviroment) { var selector = SelectorUtil.Parse(TotalPageSelector) as EnviromentSelector; if (selector != null) { totalStr = EntityExtractor.GetEnviromentValue(selector.Field, page, 0); } } else { totalStr = page.Selectable.Select(SelectorUtil.Parse(TotalPageSelector)).GetValue(); } if (!string.IsNullOrEmpty(totalStr)) { if (TotalPageFormatters != null) { foreach (var formatter in TotalPageFormatters) { totalStr = formatter.Formate(totalStr); } } if (!string.IsNullOrEmpty(totalStr)) { totalPage = int.Parse(totalStr); } } } int currentPage = -1000; if (CurrenctPageSelector != null) { string currentStr = string.Empty; if (CurrenctPageSelector.Type == SelectorType.Enviroment) { var selector = SelectorUtil.Parse(CurrenctPageSelector) as EnviromentSelector; if (selector != null) { currentStr = EntityExtractor.GetEnviromentValue(selector.Field, page, 0); } } else { currentStr = page.Selectable.Select(SelectorUtil.Parse(CurrenctPageSelector)).GetValue(); } if (!string.IsNullOrEmpty(currentStr)) { if (CurrnetPageFormatters != null) { foreach (var formatter in CurrnetPageFormatters) { currentStr = formatter.Formate(currentStr); } } if (!string.IsNullOrEmpty(currentStr)) { currentPage = int.Parse(currentStr); } } } if (currentPage == totalPage) { return(true); } return(false); }
/// <inheritdoc /> public Task <TEntity> DiscoverAsync(TSelector selector) { return(EntityExtractor.ExtractAsync(selector)); }