Ejemplo n.º 1
0
        private string GetSelectorValue(Page page, BaseSelector selector)
        {
            string totalStr = string.Empty;

            if (selector.Type == SelectorType.Enviroment)
            {
                if (SelectorUtils.Parse(TotalPageSelector) is EnviromentSelector enviromentSelector)
                {
                    totalStr = EntityExtractor.GetEnviromentValue(enviromentSelector.Field, page, 0);
                }
            }
            else
            {
                totalStr = page.Selectable.Select(SelectorUtils.Parse(TotalPageSelector)).GetValue();
            }

            if (!string.IsNullOrEmpty(totalStr) && TotalPageFormatters != null)
            {
                foreach (var formatter in TotalPageFormatters)
                {
                    totalStr = formatter.Formate(totalStr);
                }
            }

            if (string.IsNullOrEmpty(totalStr))
            {
                throw new SpiderException("The result of total selector is null.");
            }
            else
            {
                return(totalStr);
            }
        }
Ejemplo n.º 2
0
        public void Extract()
        {
            EntityExtractor <Product> extractor = new EntityExtractor <Product>();
            var results = extractor.Extract(new Page(new Request("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, dynamic>
            {
                { "cat", "手机" },
                { "cat3", "110" }
            })
            {
                Site = new Site()
            })
            {
                Content = File.ReadAllText(Path.Combine(Env.BaseDirectory, "Jd.html"))
            });

            Assert.Equal(60, results.Count);
            Assert.Equal("手机", results[0].CategoryName);
            Assert.Equal(110, results[0].CategoryId);
            Assert.Equal("http://item.jd.com/3031737.html", results[0].Url);
            Assert.Equal("3031737", results[0].Sku);
            Assert.Equal("荣耀官方旗舰店", results[0].ShopName);
            Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].Name);
            Assert.Equal("1000000904", results[0].VenderId);
            Assert.Equal("1000000904", results[0].JdzyShopId);
            Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0].RunId.ToString("yyyy-MM-dd"));
        }
Ejemplo n.º 3
0
        private void Run()
        {
            List <String> fileNames = tbInputFiles.Text.Split(new String[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries).ToList();
            List <String> includes  = new List <string>();
            List <String> excludes  = new List <string>();

            foreach (var fileName in fileNames)
            {
                if (fileName.StartsWith("-"))
                {
                    excludes.Add(fileName.Substring(1));
                }
                else if (!fileName.StartsWith("#"))
                {
                    includes.Add(fileName);
                }
            }
            try
            {
                EntityExtractor ext = LocalizationModule.Create(LocalizerType).Extractor;
                ext.Logger = WindowLogger;

                Dictionary <String, LocalizableEntity> map1 = new Dictionary <string, LocalizableEntity>();
                ext.ProcessFileList(includes.ToArray(), excludes.ToArray(), RootPathResolved, FileExtension, map1);

                map1 = map1.OrderBy(a => a.Value.ShallowEntityPath).ToDictionary(a => a.Key, a => a.Value);

                Dictionary <String, LocalizableEntity> map2 = new Dictionary <string, LocalizableEntity>();
                try
                {
                    ext.ProcessFile(ReferenceResolved, map2);
                }
                catch (Exception ex)
                {
                    WindowLogger.LogFormat("Warning: error parsing reference input ({0})", ex.Message);
                }

                map2 = map2.OrderBy(a => a.Value.ShallowEntityPath).ToDictionary(a => a.Key, a => a.Value);

                Dictionary <String, LocalizableEntity> map3 = new Dictionary <string, LocalizableEntity>();

                Diff(map1, map2, map3);

                map3 = map3.OrderBy(a => a.Value.ShallowEntityPath).ToDictionary(a => a.Key, a => a.Value);

                dgvNew.DataSource     = GetLocalizationGridDataSource(map1, false);
                dgvCurrent.DataSource = GetLocalizationGridDataSource(map2, true);
                dgvDeleted.DataSource = GetLocalizationGridDataSource(map3, false);
            }
            catch (Exception ex)
            {
                WindowLogger.Log(ex.Message);
            }
        }
Ejemplo n.º 4
0
        public override void Build(Site site)
        {
            Dictionary <Column, List <Formatter> > dic = new Dictionary <Column, List <Formatter> >();

            foreach (var column in Columns)
            {
                List <Formatter> formatters = EntityExtractor.GenerateFormatter(column.Formatters);
                dic.Add(column, formatters);
            }

            using (var conn = new MySqlConnection(ConnectString))
            {
                var data = conn.Query(GetSelectQueryString());

                Parallel.ForEach(data, new ParallelOptions {
                    MaxDegreeOfParallelism = 1
                }, brand =>
                {
                    IDictionary <string, object> tmp = (IDictionary <string, object>)brand;
                    List <string> arguments          = new List <string>();
                    foreach (var column in Columns)
                    {
                        string value = tmp[column.Name]?.ToString();

                        foreach (var formatter in dic[column])
                        {
                            value = formatter.Formate(value);
                        }
                        arguments.Add(value);
                    }

                    string tmpUrl = string.Format(FormateString, arguments.Cast <object>().ToArray());


                    site.AddStartUrl(tmpUrl, tmp);
                });
            }
        }
Ejemplo n.º 5
0
        public void Extract()
        {
            var             entityMetadata = EntitySpider.GenerateEntityDefine(typeof(Product).GetTypeInfo());
            EntityExtractor extractor      = new EntityExtractor("test", null, entityMetadata);
            var             results        = extractor.Extract(new Page(new Request("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, dynamic>
            {
                { "cat", "手机" },
                { "cat3", "110" }
            }), null)
            {
                Content = File.ReadAllText(Path.Combine(Core.Environment.BaseDirectory, "Jd.html"))
            });

            Assert.Equal(60, results.Count);
            Assert.Equal("手机", results[0]["CategoryName"]);
            Assert.Equal("110", results[0]["CategoryId"]);
            Assert.Equal("http://item.jd.com/3031737.html", results[0]["Url"]);
            Assert.Equal("3031737", results[0]["Sku"]);
            Assert.Equal("荣耀官方旗舰店", results[0]["ShopName"]);
            Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0]["Name"]);
            Assert.Equal("1000000904", results[0]["VenderId"]);
            Assert.Equal("1000000904", results[0]["JdzyShopId"]);
            Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0]["RunId"]);
        }
Ejemplo n.º 6
0
        public bool NeedStop(Page page, BaseTargetUrlsCreator creator)
        {
            int totalPage = -2000;

            if (TotalPageSelector != null)
            {
                string totalStr = string.Empty;
                if (TotalPageSelector.Type == SelectorType.Enviroment)
                {
                    var selector = SelectorUtil.Parse(TotalPageSelector) as EnviromentSelector;
                    if (selector != null)
                    {
                        totalStr = EntityExtractor.GetEnviromentValue(selector.Field, page, 0);
                    }
                }
                else
                {
                    totalStr = page.Selectable.Select(SelectorUtil.Parse(TotalPageSelector)).GetValue();
                }

                if (!string.IsNullOrEmpty(totalStr))
                {
                    if (TotalPageFormatters != null)
                    {
                        foreach (var formatter in TotalPageFormatters)
                        {
                            totalStr = formatter.Formate(totalStr);
                        }
                    }
                    if (!string.IsNullOrEmpty(totalStr))
                    {
                        totalPage = int.Parse(totalStr);
                    }
                }
            }
            int currentPage = -1000;

            if (CurrenctPageSelector != null)
            {
                string currentStr = string.Empty;
                if (CurrenctPageSelector.Type == SelectorType.Enviroment)
                {
                    var selector = SelectorUtil.Parse(CurrenctPageSelector) as EnviromentSelector;
                    if (selector != null)
                    {
                        currentStr = EntityExtractor.GetEnviromentValue(selector.Field, page, 0);
                    }
                }
                else
                {
                    currentStr = page.Selectable.Select(SelectorUtil.Parse(CurrenctPageSelector)).GetValue();
                }

                if (!string.IsNullOrEmpty(currentStr))
                {
                    if (CurrnetPageFormatters != null)
                    {
                        foreach (var formatter in CurrnetPageFormatters)
                        {
                            currentStr = formatter.Formate(currentStr);
                        }
                    }
                    if (!string.IsNullOrEmpty(currentStr))
                    {
                        currentPage = int.Parse(currentStr);
                    }
                }
            }
            if (currentPage == totalPage)
            {
                return(true);
            }
            return(false);
        }
Ejemplo n.º 7
0
 /// <inheritdoc />
 public Task <TEntity> DiscoverAsync(TSelector selector)
 {
     return(EntityExtractor.ExtractAsync(selector));
 }