public static ISelector GetSelector(ExtractBy extractBy) { string value = extractBy.Value; ISelector selector; switch (extractBy.Type) { case ExtractBy.ExtracType.Css: selector = new CssSelector(value); break; case ExtractBy.ExtracType.Regex: selector = new RegexSelector(value); break; case ExtractBy.ExtracType.XPath: selector = GetXpathSelector(value); break; case ExtractBy.ExtracType.JsonPath: selector = new JsonPathSelector(value); break; case ExtractBy.ExtracType.Enviroment: selector = new EnviromentSelector(value); break; default: selector = GetXpathSelector(value); break; } return(selector); }
private object ProcessSingle(Page page, string html, bool isRaw) { object o = null; try { o = Activator.CreateInstance(_modelType); foreach (FieldExtractor fieldExtractor in _fieldExtractors) { if (fieldExtractor.Multi) { IList <string> value = null; switch (fieldExtractor.Source) { case ExtractSource.RawHtml: value = page.GetHtml().SelectDocumentForList(fieldExtractor.Selector); break; case ExtractSource.Html: value = isRaw ? page.GetHtml().SelectDocumentForList(fieldExtractor.Selector) : fieldExtractor.Selector.SelectList(html); break; case ExtractSource.Url: value = fieldExtractor.Selector.SelectList(page.GetUrl().ToString()); break; case ExtractSource.Enviroment: { EnviromentSelector selector = fieldExtractor.Selector as EnviromentSelector; if (selector != null) { value = selector.GetValueList(page); } break; } default: value = fieldExtractor.Selector.SelectList(html); break; } if ((value == null || value.Count == 0) && fieldExtractor.NotNull) { return(null); } if (fieldExtractor.ObjectFormatter != null) { if (!string.IsNullOrEmpty(fieldExtractor.Expresion)) { MemoryStream stream = new MemoryStream(); StreamWriter writer = new StreamWriter(stream); writer.Write(fieldExtractor.Expresion); writer.Flush(); // convert stream to string stream.Position = 0; AntlrInputStream input = new AntlrInputStream(stream); ModifyScriptLexer lexer = new ModifyScriptLexer(input); CommonTokenStream tokens = new CommonTokenStream(lexer); // implement custom expresion IList <string> tmp = new List <string>(); // ReSharper disable once PossibleNullReferenceException foreach (string d in value) { lexer.Reset(); tokens.Reset(); ModifyScriptVisitor modifyScriptVisitor = new ModifyScriptVisitor(d); ModifyScriptParser parser = new ModifyScriptParser(tokens); modifyScriptVisitor.Visit(parser.expr()); if (!string.IsNullOrEmpty(modifyScriptVisitor.Value)) { tmp.Add(modifyScriptVisitor.Value); } } value = tmp; } IList <dynamic> converted = Convert(value, fieldExtractor.ObjectFormatter); dynamic field = fieldExtractor.Field.GetValue(o) ?? Activator.CreateInstance(fieldExtractor.Field.PropertyType); Type[] genericType = fieldExtractor.Field.PropertyType.GetGenericArguments(); MethodInfo method = fieldExtractor.Field.PropertyType.GetMethod("Add", genericType); foreach (var v in converted) { method.Invoke(field, new object[] { v }); } fieldExtractor.Field.SetValue(o, field); } else { fieldExtractor.Field.SetValue(o, value); } } else { string value = null; switch (fieldExtractor.Source) { case ExtractSource.RawHtml: value = page.GetHtml().SelectDocument(fieldExtractor.Selector); break; case ExtractSource.Html: value = isRaw ? page.GetHtml().SelectDocument(fieldExtractor.Selector) : fieldExtractor.Selector.Select(html); break; case ExtractSource.Url: value = fieldExtractor.Selector.Select(page.GetUrl().ToString()); break; case ExtractSource.Enviroment: { EnviromentSelector selector = fieldExtractor.Selector as EnviromentSelector; if (selector != null) { value = selector.GetValue(page)?.ToString(); if (string.IsNullOrEmpty(value)) { } } break; } default: value = fieldExtractor.Selector.Select(html); break; } if (value == null && fieldExtractor.NotNull) { return(null); } if (fieldExtractor.ObjectFormatter != null) { if (!string.IsNullOrEmpty(fieldExtractor.Expresion)) { MemoryStream stream = new MemoryStream(); StreamWriter writer = new StreamWriter(stream); writer.Write(fieldExtractor.Expresion); writer.Flush(); // convert stream to string stream.Position = 0; AntlrInputStream input = new AntlrInputStream(stream); ModifyScriptLexer lexer = new ModifyScriptLexer(input); CommonTokenStream tokens = new CommonTokenStream(lexer); ModifyScriptVisitor modifyScriptVisitor = new ModifyScriptVisitor(value); ModifyScriptParser parser = new ModifyScriptParser(tokens); modifyScriptVisitor.Visit(parser.expr()); value = modifyScriptVisitor.Value; } dynamic converted = Convert(value, fieldExtractor.ObjectFormatter); if (converted == null && fieldExtractor.NotNull) { return(null); } fieldExtractor.Field.SetValue(o, converted); } else { fieldExtractor.Field.SetValue(o, value); } } } IAfterExtractor afterExtractor = o as IAfterExtractor; afterExtractor?.AfterProcess(page); } catch (Exception e) { Logger.Error("extract fail", e); } return(o); }