Exemple #1
0
        public override bool CanStop(Page page)
        {
            int totalPage = -2000;

            if (TotalPageSelector != null)
            {
                string totalStr = page.Selectable.Select(SelectorUtil.GetSelector(TotalPageSelector)).GetValue();
                if (!string.IsNullOrEmpty(totalStr))
                {
                    totalPage = int.Parse(totalStr);
                }
            }
            int currentPage = -1000;

            if (CurrenctPageSelector != null)
            {
                string currentStr = page.Selectable.Select(SelectorUtil.GetSelector(CurrenctPageSelector)).GetValue();
                if (!string.IsNullOrEmpty(currentStr))
                {
                    currentPage = int.Parse(currentStr);
                }
            }
            if (currentPage == totalPage)
            {
                return(true);
            }
            return(false);
        }
        public override bool CanStop(Page page)
        {
            var current = page.Selectable.SelectList(SelectorUtil.GetSelector(CurrenctPageSelector)).GetValues();

            if (current == null)
            {
                return(true);
            }

            List <string> timeStrings = new List <string>();

            foreach (var c in current)
            {
                var s = c;
                if (CurrenctPageFormatters != null)
                {
                    foreach (var formatter in CurrenctPageFormatters)
                    {
                        s = formatter.Formate(s);
                    }
                }
                timeStrings.Add(s);
            }

            foreach (var c in timeStrings)
            {
                var dt = DateTime.Parse(c.ToString());
                if (IsBefore)
                {
                    foreach (var stopper in Stoppers)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt < stopDate)
                        {
                            return(true);
                        }
                    }
                }
                else
                {
                    foreach (var stopper in Stoppers)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt > stopDate)
                        {
                            return(true);
                        }
                    }
                }
            }
            return(false);
        }
Exemple #3
0
        internal Model.TargetUrlExtractor GetTargetUrlExtractInfo()
        {
            var t = new Model.TargetUrlExtractor
            {
                Formatters = Formatters,
                Region     = SelectorUtil.GetSelector(Region)
            };

            foreach (var p in Patterns)
            {
                if (!string.IsNullOrEmpty(p?.Trim()))
                {
                    t.Patterns.Add(new Regex(p));
                }
            }
            return(t);
        }
Exemple #4
0
        public override bool CanStop(Page page)
        {
            var current = page.Selectable.SelectList(SelectorUtil.GetSelector(CurrenctPageSelector)).GetValues();

            if (current == null)
            {
                return(true);
            }
            foreach (var c in (List <string>)current)
            {
                var dt = DateTime.Parse(c.ToString());
                if (IsBefore)
                {
                    foreach (var stopper in Stoppers)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt < stopDate)
                        {
                            return(true);
                        }
                    }
                }
                else
                {
                    foreach (var stopper in Stoppers)
                    {
                        var stopDate = DateTime.Parse(stopper);
                        if (dt > stopDate)
                        {
                            return(true);
                        }
                    }
                }
            }
            return(false);
        }
Exemple #5
0
        public dynamic Process(Page page)
        {
            if (_enviromentValues != null && _enviromentValues.Count > 0)
            {
                foreach (var enviromentValue in _enviromentValues)
                {
                    string name  = enviromentValue.Name;
                    var    value = page.Selectable.Select(SelectorUtil.GetSelector(enviromentValue.Selector)).GetValue();
                    page.Request.PutExtra(name, value);
                }
            }
            bool      isMulti  = false;
            ISelector selector = SelectorUtil.GetSelector(_entityDefine.Selector);

            if (selector == null)
            {
                isMulti = false;
            }
            else
            {
                isMulti = _entityDefine.Multi;
            }
            if (isMulti)
            {
                var list = page.Selectable.SelectList(selector).Nodes();
                if (list == null || list.Count == 0)
                {
                    return(null);
                }
                var countToken = _entityDefine.Limit;
                if (countToken != null)
                {
                    list = list.Take(countToken.Value).ToList();
                }

                List <JObject> result = new List <JObject>();
                int            index  = 0;
                foreach (var item in list)
                {
                    try
                    {
                        JObject obj = ProcessSingle(page, item, _entityDefine, index);
                        if (obj != null)
                        {
                            result.Add(obj);
                        }
                        index++;
                    }
                    catch (Exception e)
                    {
                    }
                }
                return(result);
            }
            else
            {
                ISelectable select;
                if (selector == null)
                {
                    select = page.Selectable;
                }
                else
                {
                    select = page.Selectable.Select(selector);
                    if (select == null)
                    {
                        return(null);
                    }
                }

                return(ProcessSingle(page, select, _entityDefine, 0));
            }
        }
Exemple #6
0
        private dynamic ExtractField(ISelectable item, Page page, DataToken field, int index)
        {
            ISelector selector = SelectorUtil.GetSelector(field.Selector);

            if (selector == null)
            {
                return(null);
            }

            var f = field as Field;
            List <Formatter.Formatter> formatters = GenerateFormatter(f?.Formatters);

            bool isEntity = field is Entity;

            if (!isEntity)
            {
                string tmpValue;
                if (selector is EnviromentSelector)
                {
                    var enviromentSelector = selector as EnviromentSelector;
                    tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index);
                    foreach (var formatter in formatters)
                    {
                        tmpValue = formatter.Formate(tmpValue);
                    }
                    return(tmpValue);
                }
                else
                {
                    if (field.Multi)
                    {
                        var propertyValues = item.SelectList(selector).Nodes();
                        if (((Field)field).Option == PropertyExtractBy.ValueOption.Count)
                        {
                            var tempValue = propertyValues != null?propertyValues.Count.ToString() : "-1";

                            return(tempValue);
                        }
                        else
                        {
                            List <string> results = new List <string>();
                            foreach (var propertyValue in propertyValues)
                            {
                                string tmp = propertyValue.GetValue(((Field)field).Option == PropertyExtractBy.ValueOption.PlainText);
                                foreach (var formatter in formatters)
                                {
                                    tmp = formatter.Formate(tmp);
                                }
                                results.Add(tmp);
                            }
                            return(new JArray(results));
                        }
                    }
                    else
                    {
                        tmpValue = item.Select(selector)?.GetValue(((Field)field).Option == PropertyExtractBy.ValueOption.PlainText);
                        if (((Field)field).Option == PropertyExtractBy.ValueOption.Count)
                        {
                            return(tmpValue == null ? 0 : 1);
                        }
                        else
                        {
                            tmpValue = formatters.Aggregate(tmpValue, (current, formatter) => formatter.Formate(current));
                            return(tmpValue);
                        }
                    }
                }
            }
            else
            {
                if (field.Multi)
                {
                    var    propertyValues = item.SelectList(selector).Nodes();
                    JArray objs           = new JArray();
                    var    selectables    = item.SelectList(selector).Nodes();
                    foreach (var selectable in selectables)
                    {
                        JObject obj = new JObject();

                        foreach (var child in ((Entity)field).Fields)
                        {
                            obj.Add(child.Name, ExtractField(selectable, page, child, 0));
                        }
                        objs.Add(obj);
                    }
                    return(objs);
                }
                else
                {
                    JObject obj        = new JObject();
                    var     selectable = item.Select(selector);
                    foreach (var child in ((Entity)field).Fields)
                    {
                        obj.Add(child.Name, ExtractField(selectable, page, field, 0));
                    }
                    return(obj);
                }
            }
        }
Exemple #7
0
        private dynamic ExtractField(ISelectable item, Page page, Field field, int index)
        {
            ISelector selector = SelectorUtil.GetSelector(field.Selector);

            if (selector == null)
            {
                return(null);
            }

            List <Formatter.Formatter> formatters = GenerateFormatter(field.Formatters);

            bool isEntity = field.Fields != null && field.Fields.Count > 0;

            if (!isEntity)
            {
                string tmpValue;
                if (selector is EnviromentSelector)
                {
                    var enviromentSelector = selector as EnviromentSelector;
                    tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index);
                    foreach (var formatter in formatters)
                    {
                        tmpValue = formatter.Formate(tmpValue);
                    }
                    return(tmpValue);
                }
                else
                {
                    if (field.Multi)
                    {
                        var propertyValues = item.SelectList(selector).Nodes();
                        if (field.Option == PropertyExtractBy.ValueOption.Count)
                        {
                            var tempValue = propertyValues != null?propertyValues.Count.ToString() : "-1";

                            return(tempValue);
                        }
                        else
                        {
                            List <string> results = new List <string>();
                            foreach (var propertyValue in propertyValues)
                            {
                                string tmp = propertyValue.GetValue(field.Option == PropertyExtractBy.ValueOption.PlainText);
                                foreach (var formatter in formatters)
                                {
                                    tmp = formatter.Formate(tmp);
                                }
                                results.Add(tmp);
                            }
                            return(new JArray(results));
                        }
                    }
                    else
                    {
                        tmpValue = item.Select(selector)?.GetValue(field.Option == PropertyExtractBy.ValueOption.PlainText);
                        if (field.Option == PropertyExtractBy.ValueOption.Count)
                        {
                            return(tmpValue == null ? 0 : 1);
                        }
                        else
                        {
                            tmpValue = formatters.Aggregate(tmpValue, (current, formatter) => formatter.Formate(current));
                            return(tmpValue);
                        }
                    }
                }
            }
            else
            {
                JObject dataObject = new JObject();
                foreach (var child in field.Fields)
                {
                    if (child.Multi)
                    {
                        var childItems = item.SelectList(SelectorUtil.GetSelector(child.Selector)).Nodes();
                        foreach (var childItem in childItems)
                        {
                            dataObject.Add(child.Name, ExtractField(childItem, page, child, childItems.IndexOf(childItem)));
                        }
                    }
                    else
                    {
                        var childItem = item.Select(SelectorUtil.GetSelector(child.Selector));
                        dataObject.Add(child.Name, ExtractField(childItem, page, child, 0));
                    }
                }
                return(dataObject);
            }
        }
        public dynamic Process(Page page)
        {
            if (_enviromentValues != null && _enviromentValues.Count > 0)
            {
                foreach (var enviromentValue in _enviromentValues)
                {
                    string name  = enviromentValue.Name;
                    var    value = page.Selectable.Select(SelectorUtil.GetSelector(enviromentValue.Selector)).GetValue();
                    page.Request.PutExtra(name, value);
                }
            }
            bool isMulti = _entityDefine.SelectToken("$.Multi").ToObject <bool>();

            ISelector selector = SelectorUtil.GetSelector(_entityDefine.SelectToken("$.Selector").ToObject <Selector>());

            if (isMulti)
            {
                if (selector == null)
                {
                    throw new SpiderExceptoin("Selector can't be null when set isMulti true.");
                }

                var list = page.Selectable.SelectList(selector).Nodes();
                if (list == null || list.Count == 0)
                {
                    return(null);
                }
                var countToken = _entityDefine.SelectToken("$.Count");
                if (countToken != null)
                {
                    int count = countToken.ToObject <int>();
                    list = list.Take(count).ToList();
                }

                List <JObject> result = new List <JObject>();
                int            index  = 0;
                foreach (var item in list)
                {
                    JObject obj = ProcessSingle(page, item, _entityDefine, index);
                    if (obj != null)
                    {
                        result.Add(obj);
                    }
                    index++;
                }
                return(result);
            }
            else
            {
                ISelectable select;
                if (selector == null)
                {
                    select = page.Selectable;
                }
                else
                {
                    select = page.Selectable.Select(selector);
                    if (select == null)
                    {
                        return(null);
                    }
                }

                return(ProcessSingle(page, select, _entityDefine, 0));
            }
        }
        private JObject ProcessSingle(Page page, ISelectable item, JToken entityDefine, int index)
        {
            JObject dataItem = new JObject();

            foreach (var field in entityDefine.SelectTokens("$.Fields[*]"))
            {
                ISelector selector = SelectorUtil.GetSelector(field.SelectToken("$.Selector").ToObject <Selector>());
                if (selector == null)
                {
                    continue;
                }

                var  datatype = field.SelectToken("$.DataType");
                bool isEntity = VerifyIfEntity(datatype);

                var  multiToken = field.SelectToken("$.Multi");
                bool isMulti    = multiToken?.ToObject <bool>() ?? false;

                var optionToken = field.SelectToken("$.Option");
                var option      = optionToken?.ToObject <PropertyExtractBy.ValueOption>() ?? PropertyExtractBy.ValueOption.None;

                string propertyName = field.SelectToken("$.Name").ToString();

                List <Formatter.Formatter> formatters = GenerateFormatter(field.SelectTokens("$.Formatters[*]"));

                if (!isEntity)
                {
                    string tmpValue;
                    if (selector is EnviromentSelector)
                    {
                        var enviromentSelector = selector as EnviromentSelector;
                        tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index);
                        foreach (var formatter in formatters)
                        {
                            tmpValue = formatter.Formate(tmpValue);
                        }
                        dataItem.Add(propertyName, tmpValue);
                    }
                    else
                    {
                        if (isMulti)
                        {
                            var propertyValues = item.SelectList(selector).GetValue(option == PropertyExtractBy.ValueOption.PlainText);
                            if (option == PropertyExtractBy.ValueOption.Count)
                            {
                                var tempValue = propertyValues != null?propertyValues.Count.ToString() : "ERROR";

                                if (tempValue == "ERROR")
                                {
                                }
                                dataItem.Add(propertyName, tempValue);
                            }
                            else
                            {
                                var countToken = _entityDefine.SelectToken("$.Count");
                                if (countToken != null)
                                {
                                    int count = countToken.ToObject <int>();
                                    propertyValues = propertyValues.Take(count).ToList();
                                }
                                List <string> results = new List <string>();
                                foreach (var propertyValue in propertyValues)
                                {
                                    string tmp = propertyValue;
                                    foreach (var formatter in formatters)
                                    {
                                        tmp = formatter.Formate(tmp);
                                    }
                                    results.Add(tmp);
                                }
                                dataItem.Add(propertyName, new JArray(results));
                            }
                        }
                        else
                        {
                            tmpValue = item.Select(selector)?.GetValue(option == PropertyExtractBy.ValueOption.PlainText);
                            if (option == PropertyExtractBy.ValueOption.Count)
                            {
                                dataItem.Add(propertyName, tmpValue == null ? 0 : 1);
                            }
                            else
                            {
                                tmpValue = formatters.Aggregate(tmpValue, (current, formatter) => formatter.Formate(current));
                                dataItem.Add(propertyName, tmpValue);
                            }
                        }
                    }
                }
                else
                {
                    if (isMulti)
                    {
                        var propertyValues = item.SelectList(selector).Nodes();
                        var countToken     = _entityDefine.SelectToken("$.Count");
                        if (countToken != null)
                        {
                            int count = countToken.ToObject <int>();
                            propertyValues = propertyValues.Take(count).ToList();
                        }

                        List <JObject> result = new List <JObject>();
                        int            index1 = 0;
                        foreach (var entity in propertyValues)
                        {
                            JObject obj = ProcessSingle(page, entity, datatype, index1);
                            if (obj != null)
                            {
                                result.Add(obj);
                            }
                            index1++;
                        }
                        dataItem.Add(propertyName, new JArray(result));
                    }
                    else
                    {
                        var select = item.Select(selector);
                        if (select == null)
                        {
                            return(null);
                        }
                        var propertyValue = ProcessSingle(page, select, datatype, 0);
                        dataItem.Add(propertyName, new JObject(propertyValue));
                    }
                }
            }
            var stoppingJobject = entityDefine.SelectToken("$.Stopping");
            var stopping        = stoppingJobject?.ToObject <Stopping>();

            if (stopping != null)
            {
                var  field    = entityDefine.SelectToken($"$.Fields[?(@.Name == '{stopping.PropertyName}')]");
                var  datatype = field.SelectToken("$.DataType");
                bool isEntity = VerifyIfEntity(datatype);
                if (isEntity)
                {
                    throw new SpiderExceptoin("Can't compare with object.");
                }
                stopping.DataType = datatype.ToString().ToLower();
                string value = dataItem.SelectToken($"$.{stopping.PropertyName}")?.ToString();
                if (string.IsNullOrEmpty(value))
                {
                    page.MissTargetUrls = true;
                }
                else
                {
                    if (stopping.NeedStop(value))
                    {
                        page.MissTargetUrls = true;
                    }
                }
            }

            return(dataItem);
        }