Beispiel #1
0
        public static ISelector GetSelector(ExtractBy extractBy)
        {
            string    value = extractBy.Value;
            ISelector selector;

            switch (extractBy.Type)
            {
            case ExtractBy.ExtracType.Css:
                selector = new CssSelector(value);
                break;

            case ExtractBy.ExtracType.Regex:
                selector = new RegexSelector(value);
                break;

            case ExtractBy.ExtracType.XPath:
                selector = GetXpathSelector(value);
                break;

            case ExtractBy.ExtracType.JsonPath:
                selector = new JsonPathSelector(value);
                break;

            case ExtractBy.ExtracType.Enviroment:
                selector = new EnviromentSelector(value);
                break;

            default:
                selector = GetXpathSelector(value);
                break;
            }
            return(selector);
        }
Beispiel #2
0
        private object ProcessSingle(Page page, string html, bool isRaw)
        {
            object o = null;

            try
            {
                o = Activator.CreateInstance(_modelType);
                foreach (FieldExtractor fieldExtractor in _fieldExtractors)
                {
                    if (fieldExtractor.Multi)
                    {
                        IList <string> value = null;
                        switch (fieldExtractor.Source)
                        {
                        case ExtractSource.RawHtml:
                            value = page.GetHtml().SelectDocumentForList(fieldExtractor.Selector);
                            break;

                        case ExtractSource.Html:
                            value = isRaw ? page.GetHtml().SelectDocumentForList(fieldExtractor.Selector) : fieldExtractor.Selector.SelectList(html);
                            break;

                        case ExtractSource.Url:
                            value = fieldExtractor.Selector.SelectList(page.GetUrl().ToString());
                            break;

                        case ExtractSource.Enviroment:
                        {
                            EnviromentSelector selector = fieldExtractor.Selector as EnviromentSelector;
                            if (selector != null)
                            {
                                value = selector.GetValueList(page);
                            }
                            break;
                        }

                        default:
                            value = fieldExtractor.Selector.SelectList(html);
                            break;
                        }
                        if ((value == null || value.Count == 0) && fieldExtractor.NotNull)
                        {
                            return(null);
                        }

                        if (fieldExtractor.ObjectFormatter != null)
                        {
                            if (!string.IsNullOrEmpty(fieldExtractor.Expresion))
                            {
                                MemoryStream stream = new MemoryStream();
                                StreamWriter writer = new StreamWriter(stream);
                                writer.Write(fieldExtractor.Expresion);
                                writer.Flush();

                                // convert stream to string
                                stream.Position = 0;
                                AntlrInputStream  input  = new AntlrInputStream(stream);
                                ModifyScriptLexer lexer  = new ModifyScriptLexer(input);
                                CommonTokenStream tokens = new CommonTokenStream(lexer);
                                // implement custom expresion
                                IList <string> tmp = new List <string>();
                                // ReSharper disable once PossibleNullReferenceException
                                foreach (string d in value)
                                {
                                    lexer.Reset();
                                    tokens.Reset();

                                    ModifyScriptVisitor modifyScriptVisitor = new ModifyScriptVisitor(d);
                                    ModifyScriptParser  parser = new ModifyScriptParser(tokens);
                                    modifyScriptVisitor.Visit(parser.expr());
                                    if (!string.IsNullOrEmpty(modifyScriptVisitor.Value))
                                    {
                                        tmp.Add(modifyScriptVisitor.Value);
                                    }
                                }
                                value = tmp;
                            }

                            IList <dynamic> converted = Convert(value, fieldExtractor.ObjectFormatter);

                            dynamic field = fieldExtractor.Field.GetValue(o) ?? Activator.CreateInstance(fieldExtractor.Field.PropertyType);

                            Type[]     genericType = fieldExtractor.Field.PropertyType.GetGenericArguments();
                            MethodInfo method      = fieldExtractor.Field.PropertyType.GetMethod("Add", genericType);
                            foreach (var v in converted)
                            {
                                method.Invoke(field, new object[] { v });
                            }

                            fieldExtractor.Field.SetValue(o, field);
                        }
                        else
                        {
                            fieldExtractor.Field.SetValue(o, value);
                        }
                    }
                    else
                    {
                        string value = null;
                        switch (fieldExtractor.Source)
                        {
                        case ExtractSource.RawHtml:
                            value = page.GetHtml().SelectDocument(fieldExtractor.Selector);
                            break;

                        case ExtractSource.Html:
                            value = isRaw ? page.GetHtml().SelectDocument(fieldExtractor.Selector) : fieldExtractor.Selector.Select(html);
                            break;

                        case ExtractSource.Url:
                            value = fieldExtractor.Selector.Select(page.GetUrl().ToString());
                            break;

                        case ExtractSource.Enviroment:
                        {
                            EnviromentSelector selector = fieldExtractor.Selector as EnviromentSelector;
                            if (selector != null)
                            {
                                value = selector.GetValue(page)?.ToString();
                                if (string.IsNullOrEmpty(value))
                                {
                                }
                            }
                            break;
                        }

                        default:
                            value = fieldExtractor.Selector.Select(html);
                            break;
                        }
                        if (value == null && fieldExtractor.NotNull)
                        {
                            return(null);
                        }
                        if (fieldExtractor.ObjectFormatter != null)
                        {
                            if (!string.IsNullOrEmpty(fieldExtractor.Expresion))
                            {
                                MemoryStream stream = new MemoryStream();
                                StreamWriter writer = new StreamWriter(stream);
                                writer.Write(fieldExtractor.Expresion);
                                writer.Flush();

                                // convert stream to string
                                stream.Position = 0;
                                AntlrInputStream input = new AntlrInputStream(stream);

                                ModifyScriptLexer lexer  = new ModifyScriptLexer(input);
                                CommonTokenStream tokens = new CommonTokenStream(lexer);

                                ModifyScriptVisitor modifyScriptVisitor = new ModifyScriptVisitor(value);
                                ModifyScriptParser  parser = new ModifyScriptParser(tokens);

                                modifyScriptVisitor.Visit(parser.expr());
                                value = modifyScriptVisitor.Value;
                            }

                            dynamic converted = Convert(value, fieldExtractor.ObjectFormatter);

                            if (converted == null && fieldExtractor.NotNull)
                            {
                                return(null);
                            }
                            fieldExtractor.Field.SetValue(o, converted);
                        }
                        else
                        {
                            fieldExtractor.Field.SetValue(o, value);
                        }
                    }
                }

                IAfterExtractor afterExtractor = o as IAfterExtractor;
                afterExtractor?.AfterProcess(page);
            }
            catch (Exception e)
            {
                Logger.Error("extract fail", e);
            }
            return(o);
        }