コード例 #1
0
        private string GetSelectorValue(Page page, Selector selectorAttribute)
        {
            string result   = string.Empty;
            var    selector = selectorAttribute.ToSelector();

            if (selectorAttribute.Type == SelectorType.Enviroment)
            {
                if (selector is EnviromentSelector enviromentSelector)
                {
                    result = SelectorUtil.GetEnviromentValue(enviromentSelector.Field, page, 0)?.ToString();
                }
            }
            else
            {
                result = page.Selectable.Select(selector).GetValue();
            }

            if (!string.IsNullOrEmpty(result) && TotalPageFormatters != null)
            {
                foreach (var formatter in TotalPageFormatters)
                {
                    result = formatter.Formate(result)?.ToString();
                }
            }

            if (string.IsNullOrWhiteSpace(result))
            {
                throw new SpiderException("The result of total selector is null");
            }
            else
            {
                return(result);
            }
        }
コード例 #2
0
        public virtual List <JObject> Extract(Page page)
        {
            List <JObject> result = new List <JObject>();

            if (_globalValues != null && _globalValues.Count > 0)
            {
                foreach (var enviromentValue in _globalValues)
                {
                    string name  = enviromentValue.Name;
                    var    value = page.Selectable.Select(SelectorUtil.Parse(enviromentValue)).GetValue();
                    page.Request.PutExtra(name, value);
                }
            }
            ISelector selector = SelectorUtil.Parse(EntityMetadata.Entity.Selector);

            if (selector != null && EntityMetadata.Entity.Multi)
            {
                var list = page.Selectable.SelectList(selector).Nodes();
                if (list == null || list.Count == 0)
                {
                    result = null;
                }
                else
                {
                    var countToken = EntityMetadata.Limit;
                    if (countToken != null)
                    {
                        list = list.Take(countToken.Value).ToList();
                    }

                    int index = 0;
                    foreach (var item in list)
                    {
                        JObject obj = ExtractSingle(page, item, index);
                        if (obj != null)
                        {
                            result.Add(obj);
                        }
                        index++;
                    }
                }
            }
            else
            {
                ISelectable select = selector == null ? page.Selectable : page.Selectable.Select(selector);

                if (select != null)
                {
                    var singleResult = ExtractSingle(page, select, 0);
                    result = new List <JObject> {
                        singleResult
                    };
                }
                else
                {
                    result = null;
                }
            };
            return(result);
        }
コード例 #3
0
        public override bool CanStop(Page page)
        {
            int totalPage = -2000;

            if (TotalPageSelector != null)
            {
                string totalStr = page.Selectable.Select(SelectorUtil.GetSelector(TotalPageSelector)).GetValue();
                if (!string.IsNullOrEmpty(totalStr))
                {
                    totalPage = int.Parse(totalStr);
                }
            }
            int currentPage = -1000;

            if (CurrenctPageSelector != null)
            {
                string currentStr = page.Selectable.Select(SelectorUtil.GetSelector(CurrenctPageSelector)).GetValue();
                if (!string.IsNullOrEmpty(currentStr))
                {
                    currentPage = int.Parse(currentStr);
                }
            }
            if (currentPage == totalPage)
            {
                return(true);
            }
            return(false);
        }
コード例 #4
0
        /// <summary>
        /// 解析成爬虫实体对象
        /// </summary>
        /// <param name="page">页面数据</param>
        /// <returns>爬虫实体对象</returns>
        public List <T> Extract(Page page)
        {
            List <T> result = new List <T>();

            if (EntityDefine.SharedValues != null && EntityDefine.SharedValues.Count > 0)
            {
                foreach (var enviromentValue in EntityDefine.SharedValues)
                {
                    string name  = enviromentValue.Name;
                    var    value = page.Selectable.Select(SelectorUtil.Parse(enviromentValue)).GetValue();
                    page.Request.PutExtra(name, value);
                }
            }
            ISelector selector = SelectorUtil.Parse(EntityDefine.Selector);

            if (selector != null && EntityDefine.Multi)
            {
                var list = page.Selectable.SelectList(selector).Nodes();
                if (list == null || list.Count == 0)
                {
                    result = null;
                }
                else
                {
                    if (EntityDefine.Take > 0)
                    {
                        list = list.Take(EntityDefine.Take).ToList();
                    }

                    for (int i = 0; i < list.Count; ++i)
                    {
                        var item = list[i];
                        var obj  = ExtractSingle(page, item, i);
                        if (obj != null)
                        {
                            result.Add(obj);
                        }
                    }
                }
            }
            else
            {
                ISelectable select = selector == null ? page.Selectable : page.Selectable.Select(selector);

                if (select != null)
                {
                    var item = ExtractSingle(page, select, 0);
                    result = item != null ? new List <T> {
                        item
                    } : null;
                }
                else
                {
                    result = null;
                }
            }
            return(result);
        }
コード例 #5
0
        public override bool CanStop(Page page)
        {
            var current = page.Selectable.SelectList(SelectorUtil.GetSelector(CurrenctPageSelector)).GetValues();

            if (current == null)
            {
                return(true);
            }

            List <string> timeStrings = new List <string>();

            foreach (var c in current)
            {
                var s = c;
                if (CurrenctPageFormatters != null)
                {
                    foreach (var formatter in CurrenctPageFormatters)
                    {
                        s = formatter.Formate(s);
                    }
                }
                timeStrings.Add(s);
            }

            foreach (var c in timeStrings)
            {
                var dt = DateTime.Parse(c.ToString());
                if (IsBefore)
                {
                    foreach (var stopper in Stoppers)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt < stopDate)
                        {
                            return(true);
                        }
                    }
                }
                else
                {
                    foreach (var stopper in Stoppers)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt > stopDate)
                        {
                            return(true);
                        }
                    }
                }
            }
            return(false);
        }
コード例 #6
0
        public bool NeedStop(Page page, BaseTargetUrlsCreator creator)
        {
            var tmps = page.Selectable.SelectList(SelectorUtil.Parse(TimeSelector)).GetValues();

            if (tmps == null)
            {
                return(true);
            }

            List <string> timeStrings = new List <string>();

            foreach (var c in tmps)
            {
                var s = c;
                if (TimeFormatters != null)
                {
                    foreach (var formatter in TimeFormatters)
                    {
                        s = formatter.Formate(s);
                    }
                }
                timeStrings.Add(s);
            }

            foreach (var c in timeStrings)
            {
                var dt = DateTime.Parse(c);
                if (IsBefore)
                {
                    foreach (var stopper in Times)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt < stopDate)
                        {
                            return(true);
                        }
                    }
                }
                else
                {
                    foreach (var stopper in Times)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt > stopDate)
                        {
                            return(true);
                        }
                    }
                }
            }
            return(false);
        }
コード例 #7
0
ファイル: ModelExtractor.cs プロジェクト: xyfy/DotnetSpider
        private string ExtractField(ISelectable item, Page page, Field field, int index)
        {
            if (field == null)
            {
                return(null);
            }

            var selector = field.ToSelector();

            if (selector == null)
            {
                return(null);
            }

            object value;

            if (selector is EnviromentSelector)
            {
                var enviromentSelector = selector as EnviromentSelector;
                value = SelectorUtil.GetEnviromentValue(enviromentSelector.Field, page, index);
            }
            else
            {
                value = field.Option == FieldOptions.Count
                                        ? item.SelectList(selector).Nodes().Count().ToString()
                                        : item.Select(selector)?.GetValue(ConvertToValueOption(field.Option));
            }

            if (field.Formatters != null && field.Formatters.Count() > 0)
            {
                foreach (var formatter in field.Formatters)
                {
#if DEBUG
                    try
                    {
#endif
                    value = formatter.Formate(value);
#if DEBUG
                }
                catch (Exception e)
                {
                    Log.Logger.Error(e.ToString());
                }
#endif
                }
            }

            return(value?.ToString());
        }
コード例 #8
0
 public void NotNullExpression()
 {
     Assert.Throws <ArgumentException>(() =>
     {
         SelectorUtil.NotNullExpression(new Selector(""));
     });
     Assert.Throws <ArgumentException>(() =>
     {
         SelectorUtil.NotNullExpression(new Selector(null));
     });
     Assert.Throws <ArgumentException>(() =>
     {
         SelectorUtil.NotNullExpression(new Selector("  "));
     });
 }
コード例 #9
0
        internal Model.TargetUrlExtractor GetTargetUrlExtractInfo()
        {
            var t = new Model.TargetUrlExtractor
            {
                Formatters = Formatters,
                Region     = SelectorUtil.Parse(Region)
            };

            foreach (var p in Patterns)
            {
                if (!string.IsNullOrEmpty(p?.Trim()))
                {
                    t.Patterns.Add(new Regex(p));
                }
            }
            return(t);
        }
コード例 #10
0
        public bool NeedStop(Page page, BaseTargetUrlsCreator creator)
        {
            int totalPage = -2000;

            if (TotalPageSelector != null)
            {
                string totalStr = page.Selectable.Select(SelectorUtil.Parse(TotalPageSelector)).GetValue();
                if (TotalPageFormatters != null)
                {
                    foreach (var formatter in TotalPageFormatters)
                    {
                        totalStr = formatter.Formate(totalStr);
                    }
                }
                if (!string.IsNullOrEmpty(totalStr))
                {
                    totalPage = int.Parse(totalStr);
                }
            }
            int currentPage = -1000;

            if (CurrenctPageSelector != null)
            {
                string currentStr = page.Selectable.Select(SelectorUtil.Parse(CurrenctPageSelector)).GetValue();
                if (CurrnetPageFormatters != null)
                {
                    foreach (var formatter in CurrnetPageFormatters)
                    {
                        currentStr = formatter.Formate(currentStr);
                    }
                }
                if (!string.IsNullOrEmpty(currentStr))
                {
                    currentPage = int.Parse(currentStr);
                }
            }
            if (currentPage == totalPage)
            {
                return(true);
            }
            return(false);
        }
コード例 #11
0
        public int?Interval(Page page)
        {
            var intervalStr = page.Selectable.Select(SelectorUtil.Parse(Selector)).GetValue();

            if (!string.IsNullOrEmpty(intervalStr))
            {
                if (IntervalFormatters != null)
                {
                    foreach (var formatter in IntervalFormatters)
                    {
                        intervalStr = formatter.Formate(intervalStr);
                    }
                }
                if (!string.IsNullOrEmpty(intervalStr))
                {
                    return(int.Parse(intervalStr));
                }
            }
            return(null);
        }
コード例 #12
0
        public override bool CanStop(Page page)
        {
            var current = page.Selectable.SelectList(SelectorUtil.GetSelector(CurrenctPageSelector)).GetValues();

            if (current == null)
            {
                return(true);
            }
            foreach (var c in (List <string>)current)
            {
                var dt = DateTime.Parse(c.ToString());
                if (IsBefore)
                {
                    foreach (var stopper in Stoppers)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt < stopDate)
                        {
                            return(true);
                        }
                    }
                }
                else
                {
                    foreach (var stopper in Stoppers)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt > stopDate)
                        {
                            return(true);
                        }
                    }
                }
            }
            return(false);
        }
コード例 #13
0
        private dynamic ExtractField(ISelectable item, Page page, DataToken field, int index)
        {
            ISelector selector = SelectorUtil.GetSelector(field.Selector);

            if (selector == null)
            {
                return(null);
            }

            var f = field as Field;
            List <Formatter.Formatter> formatters = GenerateFormatter(f?.Formatters);

            bool isEntity = field is Entity;

            if (!isEntity)
            {
                string tmpValue;
                if (selector is EnviromentSelector)
                {
                    var enviromentSelector = selector as EnviromentSelector;
                    tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index);
                    foreach (var formatter in formatters)
                    {
                        tmpValue = formatter.Formate(tmpValue);
                    }
                    return(tmpValue);
                }
                else
                {
                    if (field.Multi)
                    {
                        var propertyValues = item.SelectList(selector).Nodes();
                        if (((Field)field).Option == PropertyExtractBy.ValueOption.Count)
                        {
                            var tempValue = propertyValues != null?propertyValues.Count.ToString() : "-1";

                            return(tempValue);
                        }
                        else
                        {
                            List <string> results = new List <string>();
                            foreach (var propertyValue in propertyValues)
                            {
                                string tmp = propertyValue.GetValue(((Field)field).Option == PropertyExtractBy.ValueOption.PlainText);
                                foreach (var formatter in formatters)
                                {
                                    tmp = formatter.Formate(tmp);
                                }
                                results.Add(tmp);
                            }
                            return(new JArray(results));
                        }
                    }
                    else
                    {
                        tmpValue = item.Select(selector)?.GetValue(((Field)field).Option == PropertyExtractBy.ValueOption.PlainText);
                        if (((Field)field).Option == PropertyExtractBy.ValueOption.Count)
                        {
                            return(tmpValue == null ? 0 : 1);
                        }
                        else
                        {
                            tmpValue = formatters.Aggregate(tmpValue, (current, formatter) => formatter.Formate(current));
                            return(tmpValue);
                        }
                    }
                }
            }
            else
            {
                if (field.Multi)
                {
                    var    propertyValues = item.SelectList(selector).Nodes();
                    JArray objs           = new JArray();
                    var    selectables    = item.SelectList(selector).Nodes();
                    foreach (var selectable in selectables)
                    {
                        JObject obj = new JObject();

                        foreach (var child in ((Entity)field).Fields)
                        {
                            obj.Add(child.Name, ExtractField(selectable, page, child, 0));
                        }
                        objs.Add(obj);
                    }
                    return(objs);
                }
                else
                {
                    JObject obj        = new JObject();
                    var     selectable = item.Select(selector);
                    foreach (var child in ((Entity)field).Fields)
                    {
                        obj.Add(child.Name, ExtractField(selectable, page, field, 0));
                    }
                    return(obj);
                }
            }
        }
コード例 #14
0
        private object ExtractField(ISelectable item, Page page, Column field, int index)
        {
            if (field == null)
            {
                return(null);
            }
            ISelector selector = SelectorUtil.Parse(field.Selector);

            if (selector == null)
            {
                return(null);
            }

            if (selector is EnviromentSelector)
            {
                var enviromentSelector = selector as EnviromentSelector;
                var value = SelectorUtil.GetEnviromentValue(enviromentSelector.Field, page, index);
                foreach (var formatter in field.Formatters)
                {
#if DEBUG
                    try
                    {
#endif
                    value = formatter.Formate(value);
#if DEBUG
                }
                catch (Exception e)
                {
                    Logger.NLog(e.ToString(), Level.Error);
                }
#endif
                }
                return(TryConvert(value, field.DataType));
            }
            else
            {
                bool needCount = field.Option == PropertyDefine.Options.Count;
                if (needCount)
                {
                    var values = item.SelectList(selector).Nodes();
                    return(values.Count);
                }
                else
                {
                    var value = (object)item.Select(selector)?.GetValue(field.Option == PropertyDefine.Options.PlainText);

                    foreach (var formatter in field.Formatters)
                    {
#if DEBUG
                        try
                        {
#endif
                        value = formatter.Formate(value);
#if DEBUG
                    }
                    catch (Exception e)
                    {
                        Logger.NLog(e.ToString(), Level.Error);
                    }
#endif
                    }

                    return(TryConvert(value, field.DataType));
                }
            }
        }
コード例 #15
0
        public bool NeedStop(Page page, BaseTargetUrlsCreator creator)
        {
            int totalPage = -2000;

            if (TotalPageSelector != null)
            {
                string totalStr = string.Empty;
                if (TotalPageSelector.Type == SelectorType.Enviroment)
                {
                    var selector = SelectorUtil.Parse(TotalPageSelector) as EnviromentSelector;
                    if (selector != null)
                    {
                        totalStr = EntityExtractor.GetEnviromentValue(selector.Field, page, 0);
                    }
                }
                else
                {
                    totalStr = page.Selectable.Select(SelectorUtil.Parse(TotalPageSelector)).GetValue();
                }

                if (!string.IsNullOrEmpty(totalStr))
                {
                    if (TotalPageFormatters != null)
                    {
                        foreach (var formatter in TotalPageFormatters)
                        {
                            totalStr = formatter.Formate(totalStr);
                        }
                    }
                    if (!string.IsNullOrEmpty(totalStr))
                    {
                        totalPage = int.Parse(totalStr);
                    }
                }
            }
            int currentPage = -1000;

            if (CurrenctPageSelector != null)
            {
                string currentStr = string.Empty;
                if (CurrenctPageSelector.Type == SelectorType.Enviroment)
                {
                    var selector = SelectorUtil.Parse(CurrenctPageSelector) as EnviromentSelector;
                    if (selector != null)
                    {
                        currentStr = EntityExtractor.GetEnviromentValue(selector.Field, page, 0);
                    }
                }
                else
                {
                    currentStr = page.Selectable.Select(SelectorUtil.Parse(CurrenctPageSelector)).GetValue();
                }

                if (!string.IsNullOrEmpty(currentStr))
                {
                    if (CurrnetPageFormatters != null)
                    {
                        foreach (var formatter in CurrnetPageFormatters)
                        {
                            currentStr = formatter.Formate(currentStr);
                        }
                    }
                    if (!string.IsNullOrEmpty(currentStr))
                    {
                        currentPage = int.Parse(currentStr);
                    }
                }
            }
            if (currentPage == totalPage)
            {
                return(true);
            }
            return(false);
        }
コード例 #16
0
        public dynamic Process(Page page)
        {
            if (_enviromentValues != null && _enviromentValues.Count > 0)
            {
                foreach (var enviromentValue in _enviromentValues)
                {
                    string name  = enviromentValue.Name;
                    var    value = page.Selectable.Select(SelectorUtil.GetSelector(enviromentValue.Selector)).GetValue();
                    page.Request.PutExtra(name, value);
                }
            }
            bool isMulti = _entityDefine.SelectToken("$.Multi").ToObject <bool>();

            ISelector selector = SelectorUtil.GetSelector(_entityDefine.SelectToken("$.Selector").ToObject <Selector>());

            if (isMulti)
            {
                if (selector == null)
                {
                    throw new SpiderExceptoin("Selector can't be null when set isMulti true.");
                }

                var list = page.Selectable.SelectList(selector).Nodes();
                if (list == null || list.Count == 0)
                {
                    return(null);
                }
                var countToken = _entityDefine.SelectToken("$.Count");
                if (countToken != null)
                {
                    int count = countToken.ToObject <int>();
                    list = list.Take(count).ToList();
                }

                List <JObject> result = new List <JObject>();
                int            index  = 0;
                foreach (var item in list)
                {
                    JObject obj = ProcessSingle(page, item, _entityDefine, index);
                    if (obj != null)
                    {
                        result.Add(obj);
                    }
                    index++;
                }
                return(result);
            }
            else
            {
                ISelectable select;
                if (selector == null)
                {
                    select = page.Selectable;
                }
                else
                {
                    select = page.Selectable.Select(selector);
                    if (select == null)
                    {
                        return(null);
                    }
                }

                return(ProcessSingle(page, select, _entityDefine, 0));
            }
        }
コード例 #17
0
        private JObject ProcessSingle(Page page, ISelectable item, JToken entityDefine, int index)
        {
            JObject dataItem = new JObject();

            foreach (var field in entityDefine.SelectTokens("$.Fields[*]"))
            {
                ISelector selector = SelectorUtil.GetSelector(field.SelectToken("$.Selector").ToObject <Selector>());
                if (selector == null)
                {
                    continue;
                }

                var  datatype = field.SelectToken("$.DataType");
                bool isEntity = VerifyIfEntity(datatype);

                var  multiToken = field.SelectToken("$.Multi");
                bool isMulti    = multiToken?.ToObject <bool>() ?? false;

                var optionToken = field.SelectToken("$.Option");
                var option      = optionToken?.ToObject <PropertyExtractBy.ValueOption>() ?? PropertyExtractBy.ValueOption.None;

                string propertyName = field.SelectToken("$.Name").ToString();

                List <Formatter.Formatter> formatters = GenerateFormatter(field.SelectTokens("$.Formatters[*]"));

                if (!isEntity)
                {
                    string tmpValue;
                    if (selector is EnviromentSelector)
                    {
                        var enviromentSelector = selector as EnviromentSelector;
                        tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index);
                        foreach (var formatter in formatters)
                        {
                            tmpValue = formatter.Formate(tmpValue);
                        }
                        dataItem.Add(propertyName, tmpValue);
                    }
                    else
                    {
                        if (isMulti)
                        {
                            var propertyValues = item.SelectList(selector).GetValue(option == PropertyExtractBy.ValueOption.PlainText);
                            if (option == PropertyExtractBy.ValueOption.Count)
                            {
                                var tempValue = propertyValues != null?propertyValues.Count.ToString() : "ERROR";

                                if (tempValue == "ERROR")
                                {
                                }
                                dataItem.Add(propertyName, tempValue);
                            }
                            else
                            {
                                var countToken = _entityDefine.SelectToken("$.Count");
                                if (countToken != null)
                                {
                                    int count = countToken.ToObject <int>();
                                    propertyValues = propertyValues.Take(count).ToList();
                                }
                                List <string> results = new List <string>();
                                foreach (var propertyValue in propertyValues)
                                {
                                    string tmp = propertyValue;
                                    foreach (var formatter in formatters)
                                    {
                                        tmp = formatter.Formate(tmp);
                                    }
                                    results.Add(tmp);
                                }
                                dataItem.Add(propertyName, new JArray(results));
                            }
                        }
                        else
                        {
                            tmpValue = item.Select(selector)?.GetValue(option == PropertyExtractBy.ValueOption.PlainText);
                            if (option == PropertyExtractBy.ValueOption.Count)
                            {
                                dataItem.Add(propertyName, tmpValue == null ? 0 : 1);
                            }
                            else
                            {
                                tmpValue = formatters.Aggregate(tmpValue, (current, formatter) => formatter.Formate(current));
                                dataItem.Add(propertyName, tmpValue);
                            }
                        }
                    }
                }
                else
                {
                    if (isMulti)
                    {
                        var propertyValues = item.SelectList(selector).Nodes();
                        var countToken     = _entityDefine.SelectToken("$.Count");
                        if (countToken != null)
                        {
                            int count = countToken.ToObject <int>();
                            propertyValues = propertyValues.Take(count).ToList();
                        }

                        List <JObject> result = new List <JObject>();
                        int            index1 = 0;
                        foreach (var entity in propertyValues)
                        {
                            JObject obj = ProcessSingle(page, entity, datatype, index1);
                            if (obj != null)
                            {
                                result.Add(obj);
                            }
                            index1++;
                        }
                        dataItem.Add(propertyName, new JArray(result));
                    }
                    else
                    {
                        var select = item.Select(selector);
                        if (select == null)
                        {
                            return(null);
                        }
                        var propertyValue = ProcessSingle(page, select, datatype, 0);
                        dataItem.Add(propertyName, new JObject(propertyValue));
                    }
                }
            }
            var stoppingJobject = entityDefine.SelectToken("$.Stopping");
            var stopping        = stoppingJobject?.ToObject <Stopping>();

            if (stopping != null)
            {
                var  field    = entityDefine.SelectToken($"$.Fields[?(@.Name == '{stopping.PropertyName}')]");
                var  datatype = field.SelectToken("$.DataType");
                bool isEntity = VerifyIfEntity(datatype);
                if (isEntity)
                {
                    throw new SpiderExceptoin("Can't compare with object.");
                }
                stopping.DataType = datatype.ToString().ToLower();
                string value = dataItem.SelectToken($"$.{stopping.PropertyName}")?.ToString();
                if (string.IsNullOrEmpty(value))
                {
                    page.MissTargetUrls = true;
                }
                else
                {
                    if (stopping.NeedStop(value))
                    {
                        page.MissTargetUrls = true;
                    }
                }
            }

            return(dataItem);
        }
コード例 #18
0
        public List <JObject> Process(Page page)
        {
            List <JObject> result   = new List <JObject>();
            bool           isTarget = true;

            foreach (var targetUrlExtractor in EntityMetadata.TargetUrlExtractors)
            {
                foreach (var regex in targetUrlExtractor.Regexes)
                {
                    isTarget = regex.IsMatch(page.Url);
                    if (isTarget)
                    {
                        break;
                    }
                }
            }
            if (!isTarget)
            {
                return(null);
            }
            if (_globalValues != null && _globalValues.Count > 0)
            {
                foreach (var enviromentValue in _globalValues)
                {
                    string name  = enviromentValue.Name;
                    var    value = page.Selectable.Select(SelectorUtil.Parse(enviromentValue)).GetValue();
                    page.Request.PutExtra(name, value);
                }
            }
            ISelector selector = SelectorUtil.Parse(EntityMetadata.Entity.Selector);

            if (selector != null && EntityMetadata.Entity.Multi)
            {
                var list = page.Selectable.SelectList(selector).Nodes();
                if (list == null || list.Count == 0)
                {
                    result = null;
                }
                else
                {
                    var countToken = EntityMetadata.Limit;
                    if (countToken != null)
                    {
                        list = list.Take(countToken.Value).ToList();
                    }

                    int index = 0;
                    foreach (var item in list)
                    {
                        JObject obj = ProcessSingle(page, item, index);
                        if (obj != null)
                        {
                            result.Add(obj);
                        }
                        index++;
                    }
                }
            }
            else
            {
                ISelectable select = selector == null ? page.Selectable : page.Selectable.Select(selector);

                if (select != null)
                {
                    var singleResult = ProcessSingle(page, select, 0);
                    result = new List <JObject> {
                        singleResult
                    };
                }
                else
                {
                    result = null;
                }
            }

            //if (EntityMetadata.TargetUrlsCreators != null && EntityMetadata.TargetUrlExtractors.Count > 0)
            //{
            //	foreach (var targetUrlsCreator in EntityMetadata.TargetUrlsCreators)
            //	{
            //		page.AddTargetRequests(targetUrlsCreator.Handle(page));
            //	}
            //}

            if (!page.MissExtractTargetUrls)
            {
                ExtractLinks(page, EntityMetadata.TargetUrlExtractors);
            }

            return(result);
        }
コード例 #19
0
        private dynamic ExtractField(ISelectable item, Page page, DataToken field, int index)
        {
            ISelector selector = SelectorUtil.Parse(field.Selector);

            if (selector == null)
            {
                return(null);
            }

            var f = field as Field;

            bool isEntity = field is Entity;

            if (!isEntity)
            {
                string tmpValue;
                if (selector is EnviromentSelector)
                {
                    var enviromentSelector = selector as EnviromentSelector;
                    tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index);
                    if (f != null)
                    {
                        foreach (var formatter in f.Formatters)
                        {
                            tmpValue = formatter.Formate(tmpValue);
                        }
                    }
                    return(tmpValue);
                }
                else
                {
                    bool needPlainText = ((Field)field).Option == PropertySelector.Options.PlainText;
                    if (field.Multi)
                    {
                        var propertyValues = item.SelectList(selector).Nodes();

                        List <string> results = new List <string>();
                        foreach (var propertyValue in propertyValues)
                        {
                            results.Add(propertyValue.GetValue(needPlainText));
                        }
                        if (f != null)
                        {
                            foreach (var formatter in f.Formatters)
                            {
                                results = formatter.Formate(results);
                            }
                        }
                        return(new JArray(results));
                    }
                    else
                    {
                        bool needCount = (((Field)field).Option == PropertySelector.Options.Count);
                        if (needCount)
                        {
                            var propertyValues = item.SelectList(selector).Nodes();
                            return(propertyValues?.Count.ToString() ?? "-1");
                        }
                        else
                        {
                            tmpValue = item.Select(selector)?.GetValue(needPlainText);
                            if (f != null)
                            {
                                foreach (var formatter in f.Formatters)
                                {
                                    tmpValue = formatter.Formate(tmpValue);
                                }
                            }
                            return(tmpValue);
                        }
                    }
                }
            }
            else
            {
                if (field.Multi)
                {
                    JArray objs        = new JArray();
                    var    selectables = item.SelectList(selector).Nodes();
                    foreach (var selectable in selectables)
                    {
                        JObject obj = new JObject();

                        foreach (var child in ((Entity)field).Fields)
                        {
                            obj.Add(child.Name, ExtractField(selectable, page, child, 0));
                        }
                        objs.Add(obj);
                    }
                    return(objs);
                }
                else
                {
                    JObject obj        = new JObject();
                    var     selectable = item.Select(selector);
                    foreach (var child in ((Entity)field).Fields)
                    {
                        obj.Add(child.Name, ExtractField(selectable, page, field, 0));
                    }
                    return(obj);
                }
            }
        }
コード例 #20
0
        private dynamic ExtractField(ISelectable item, Page page, Field field, int index)
        {
            ISelector selector = SelectorUtil.GetSelector(field.Selector);

            if (selector == null)
            {
                return(null);
            }

            List <Formatter.Formatter> formatters = GenerateFormatter(field.Formatters);

            bool isEntity = field.Fields != null && field.Fields.Count > 0;

            if (!isEntity)
            {
                string tmpValue;
                if (selector is EnviromentSelector)
                {
                    var enviromentSelector = selector as EnviromentSelector;
                    tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index);
                    foreach (var formatter in formatters)
                    {
                        tmpValue = formatter.Formate(tmpValue);
                    }
                    return(tmpValue);
                }
                else
                {
                    if (field.Multi)
                    {
                        var propertyValues = item.SelectList(selector).Nodes();
                        if (field.Option == PropertyExtractBy.ValueOption.Count)
                        {
                            var tempValue = propertyValues != null?propertyValues.Count.ToString() : "-1";

                            return(tempValue);
                        }
                        else
                        {
                            List <string> results = new List <string>();
                            foreach (var propertyValue in propertyValues)
                            {
                                string tmp = propertyValue.GetValue(field.Option == PropertyExtractBy.ValueOption.PlainText);
                                foreach (var formatter in formatters)
                                {
                                    tmp = formatter.Formate(tmp);
                                }
                                results.Add(tmp);
                            }
                            return(new JArray(results));
                        }
                    }
                    else
                    {
                        tmpValue = item.Select(selector)?.GetValue(field.Option == PropertyExtractBy.ValueOption.PlainText);
                        if (field.Option == PropertyExtractBy.ValueOption.Count)
                        {
                            return(tmpValue == null ? 0 : 1);
                        }
                        else
                        {
                            tmpValue = formatters.Aggregate(tmpValue, (current, formatter) => formatter.Formate(current));
                            return(tmpValue);
                        }
                    }
                }
            }
            else
            {
                JObject dataObject = new JObject();
                foreach (var child in field.Fields)
                {
                    if (child.Multi)
                    {
                        var childItems = item.SelectList(SelectorUtil.GetSelector(child.Selector)).Nodes();
                        foreach (var childItem in childItems)
                        {
                            dataObject.Add(child.Name, ExtractField(childItem, page, child, childItems.IndexOf(childItem)));
                        }
                    }
                    else
                    {
                        var childItem = item.Select(SelectorUtil.GetSelector(child.Selector));
                        dataObject.Add(child.Name, ExtractField(childItem, page, child, 0));
                    }
                }
                return(dataObject);
            }
        }
コード例 #21
0
        private dynamic ExtractField(ISelectable item, Page page, Field field, int index)
        {
            if (field == null)
            {
                return(null);
            }
            ISelector selector = SelectorUtil.Parse(field.Selector);

            if (selector == null)
            {
                return(null);
            }

            string tmpValue;

            if (selector is EnviromentSelector)
            {
                var enviromentSelector = selector as EnviromentSelector;
                tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index);
                foreach (var formatter in field.Formatters)
                {
                    tmpValue = formatter.Formate(tmpValue);
                }
                return(tmpValue);
            }
            else
            {
                bool needPlainText = field.Option == PropertyDefine.Options.PlainText;
                if (field.Multi)
                {
                    var propertyValues = item.SelectList(selector).Nodes();

                    List <string> results = new List <string>();
                    foreach (var propertyValue in propertyValues)
                    {
                        results.Add(propertyValue.GetValue(needPlainText));
                    }
                    foreach (var formatter in field.Formatters)
                    {
                        results = formatter.Formate(results);
                    }
                    return(new JArray(results));
                }
                else
                {
                    bool needCount = field.Option == PropertyDefine.Options.Count;
                    if (needCount)
                    {
                        var    propertyValues = item.SelectList(selector).Nodes();
                        string count          = propertyValues?.Count.ToString();
                        count = string.IsNullOrEmpty(count) ? "-1" : count;
                        return(count);
                    }
                    else
                    {
                        tmpValue = item.Select(selector)?.GetValue(needPlainText);
                        foreach (var formatter in field.Formatters)
                        {
                            tmpValue = formatter.Formate(tmpValue);
                        }
                        return(tmpValue);
                    }
                }
            }
        }
コード例 #22
0
        public dynamic Process(Page page)
        {
            if (_enviromentValues != null && _enviromentValues.Count > 0)
            {
                foreach (var enviromentValue in _enviromentValues)
                {
                    string name  = enviromentValue.Name;
                    var    value = page.Selectable.Select(SelectorUtil.GetSelector(enviromentValue.Selector)).GetValue();
                    page.Request.PutExtra(name, value);
                }
            }
            bool      isMulti  = false;
            ISelector selector = SelectorUtil.GetSelector(_entityDefine.Selector);

            if (selector == null)
            {
                isMulti = false;
            }
            else
            {
                isMulti = _entityDefine.Multi;
            }
            if (isMulti)
            {
                var list = page.Selectable.SelectList(selector).Nodes();
                if (list == null || list.Count == 0)
                {
                    return(null);
                }
                var countToken = _entityDefine.Limit;
                if (countToken != null)
                {
                    list = list.Take(countToken.Value).ToList();
                }

                List <JObject> result = new List <JObject>();
                int            index  = 0;
                foreach (var item in list)
                {
                    try
                    {
                        JObject obj = ProcessSingle(page, item, _entityDefine, index);
                        if (obj != null)
                        {
                            result.Add(obj);
                        }
                        index++;
                    }
                    catch (Exception e)
                    {
                    }
                }
                return(result);
            }
            else
            {
                ISelectable select;
                if (selector == null)
                {
                    select = page.Selectable;
                }
                else
                {
                    select = page.Selectable.Select(selector);
                    if (select == null)
                    {
                        return(null);
                    }
                }

                return(ProcessSingle(page, select, _entityDefine, 0));
            }
        }