public static string Execute(string html, IParserProvider parser, Uri page, CookieContainer cookies) { if (GhostConfiguration.ScriptEngine != null) { var config = new Configuration(); var internalparse = new DocumentBuilder(config); var document = internalparse.FromHtml(html); //var scriptInitialize = document.CreateElement("script"); //scriptInitialize.TextContent = string.Format(@"document.location.href='{0}';", page.OriginalString); //document.Head.Append(scriptInitialize); var scripts = document.Scripts.Where(x => x.Attributes.Any(i => i.Name == "src")).ToList(); var styles = document.QuerySelectorAll("link[href]").Cast<IElement>().ToList(); var iframes = document.QuerySelectorAll("iframe[src]").Cast<IElement>().ToList(); foreach (var script in scripts) ResolveUrls(page, script, "src"); foreach (var script in styles) ResolveUrls(page, script, "href"); foreach (var script in iframes) ResolveUrls(page, script, "src"); var htmlParsed = document.ToHtml(); return GhostConfiguration.ScriptEngine.Run(htmlParsed, cookies); } return html; }
private void SetUpParsers() { var humidityParser = Substitute.For <IParser>(); humidityParser.Type.Returns("humidity"); humidityParser.ParseStream(_humidityStream).Returns(_humidityParsedData); humidityParser.ParseStream(Arg.Is <Stream>(x => x.Length.Equals(_humidityHistoricalEntryStream.Length))).Returns(_humidityHistoricalParsedData); var rainfallParser = Substitute.For <IParser>(); rainfallParser.Type.Returns("rainfall"); rainfallParser.ParseStream(_rainfallStream).Returns(_rainfallParsedData); rainfallParser.ParseStream(Arg.Is <Stream>(x => x.Length.Equals(_rainfallHistoricalEntryStream.Length))).Returns(_rainfallHistoricalParsedData); var temperatureParser = Substitute.For <IParser>(); temperatureParser.Type.Returns("temperature"); temperatureParser.ParseStream(_temperatureStream).Returns(_temperatureParsedData); temperatureParser.ParseStream(Arg.Is <Stream>(x => x.Length.Equals(_temperatureHistoricalEntryStream.Length))).Returns(_temperatureHistoricalParsedData); _parserProvider = Substitute.For <IParserProvider>(); _parserProvider.GetParser(_humiditySensorType).Returns(humidityParser); _parserProvider.GetParser(_rainfallSensorType).Returns(rainfallParser); _parserProvider.GetParser(_temperatureSensorType).Returns(temperatureParser); }
/// <summary> /// Gets rows as enumeration of dictionaries. /// </summary> /// <param name="rows">Source rows.</param> /// <param name="parserProvider"></param> /// <returns></returns> public static IEnumerable <IReadOnlyDictionary <string, string> > AsDictionaryList( this IEnumerable <ExcelElement <Row> > rows, IParserProvider parserProvider) { rows.AssertArgumentNotNull(nameof(rows)); parserProvider.AssertArgumentNotNull(nameof(parserProvider)); ExcelElement <HeaderCell>[]? headers = null; foreach (var row in rows) { if (headers == null) { // Use first row as headers headers = row.GetHeaders(); continue; } //TODO: Use cached parserProvider var rowValues = row.GetRowValues(headers: headers, parserProvider: parserProvider); // skip empty line if (rowValues.All(string.IsNullOrWhiteSpace)) { continue; } var headerNames = headers.Select(header => header.Data.Name ?? header.Data.ColumnReference).ToArrayDebug(); var expandoObject = headerNames .Zip(rowValues, (header, value) => (header, value)) .ToDictionary(tuple => tuple.header, tuple => tuple.value); yield return(expandoObject); } }
public static IEnumerable <TableDataRow> AsParsedRows( this IEnumerable <ExcelElement <Row> > rows, IParserProvider parserProvider = null) { ExcelElement <HeaderCell>[] headers = null; foreach (var row in rows) { if (headers == null) { headers = row.GetHeaders(); continue; } var rowValues = row.GetRowValues(headers: headers, parserProvider: parserProvider); // skip empty line if (rowValues.All(string.IsNullOrWhiteSpace)) { continue; } var headerNames = headers.Select(header => header.Data.Name ?? header.Data.ColumnReference).ToArrayDebug(); var expandoObject = headerNames .Zip(rowValues, (header, value) => (header, value)) .ToDictionary(tuple => tuple.header, tuple => tuple.value); yield return(new TableDataRow(expandoObject, row)); } }
public CrawlJob(IOutgoingNotifications outgoingNotifications, IParserProvider parserProvider, ILogger logger, QueryExecutor queryExecutor) { _outgoingNotifications = outgoingNotifications; _parserProvider = parserProvider; _queryExecutor = queryExecutor; _logger = logger; }
public static string Execute(string html, IParserProvider parser, Uri page, CookieContainer cookies) { if (GhostConfiguration.ScriptEngine != null) { var config = new Configuration(); var internalparse = new DocumentBuilder(config); var document = internalparse.FromHtml(html); //var scriptInitialize = document.CreateElement("script"); //scriptInitialize.TextContent = string.Format(@"document.location.href='{0}';", page.OriginalString); //document.Head.Append(scriptInitialize); var scripts = document.Scripts.Where(x => x.Attributes.Any(i => i.Name == "src")).ToList(); var styles = document.QuerySelectorAll("link[href]").Cast <IElement>().ToList(); var iframes = document.QuerySelectorAll("iframe[src]").Cast <IElement>().ToList(); foreach (var script in scripts) { ResolveUrls(page, script, "src"); } foreach (var script in styles) { ResolveUrls(page, script, "href"); } foreach (var script in iframes) { ResolveUrls(page, script, "src"); } var htmlParsed = document.ToHtml(); return(GhostConfiguration.ScriptEngine.Run(htmlParsed, cookies)); } return(html); }
/// <summary> /// Public constructor used for dependencies injection /// </summary> /// <param name="rssSourcesProvider">RSS Sources prodiver instance</param> /// <param name="newsProvider">New provider instance</param> /// <param name="storage">Storage provider instance</param> /// <param name="parser">Parser provider instance</param> public SourcesController(IRssSourcesProvider rssSourcesProvider, INewsProvider newsProvider, IContentStorage storage, IParserProvider parser) { _rssSourcesProvider = rssSourcesProvider; _newsProvider = newsProvider; _contentStorage = storage; _newsParser = parser; }
/// <summary> /// Parses dictionary to <see cref="IPropertyValue"/> list. /// </summary> /// <param name="parserProvider"><see cref="IParserProvider"/>.</param> /// <param name="sourceRow">Source data.</param> /// <returns><see cref="IPropertyValue"/> list.</returns> public static IReadOnlyList <IPropertyValue> ParseProperties(this IParserProvider parserProvider, IReadOnlyDictionary <string, string> sourceRow) { var propertyValues = parserProvider .GetParsers() .Select(parser => ParseRowOrGetDefault(parser, sourceRow)) .SelectMany(propertyValue => propertyValue) .ToList(); return(propertyValues); }
public Parser( IDataProvider <string> dataProvider, IParserProvider <string, Uri> parserProvider, ISenderProvider <Uri> senderProvider, IValidator <string> validator) { this.dataProvider = dataProvider ?? throw new ArgumentNullException(nameof(dataProvider)); this.parserProvider = parserProvider ?? throw new ArgumentNullException(nameof(parserProvider)); this.senderProvider = senderProvider ?? throw new ArgumentNullException(nameof(senderProvider)); this.validator = validator ?? throw new ArgumentNullException(nameof(validator)); }
public DevicesHandler( IParserProvider parserProvider, IFilePathBuilder filePathBuilder, IBlobClientProvider blobClientProvider, Func <Stream, ZipArchive> zipArchiveProvider, IDeviceDataBuilder deviceDataBuilder) { _parserProvider = parserProvider; _filePathBuilder = filePathBuilder; _blobClientProvider = blobClientProvider; _zipArchiveProvider = zipArchiveProvider; _deviceDataBuilder = deviceDataBuilder; }
/// <summary> /// Parses dictionary to <see cref="IPropertyValue"/> list. /// </summary> /// <param name="parserProvider"><see cref="IParserProvider"/>.</param> /// <param name="sourceRow">Source data.</param> /// <returns><see cref="IPropertyValue"/> list.</returns> public static IReadOnlyList <IPropertyValue> ParseProperties(this IParserProvider parserProvider, IReadOnlyDictionary <string, string> sourceRow) { parserProvider.AssertArgumentNotNull(nameof(parserProvider)); sourceRow.AssertArgumentNotNull(nameof(sourceRow)); var propertyValues = parserProvider .GetParsers() .Select(parser => parser.ParseRowUntyped(sourceRow)) .Where(result => result.IsSuccess) .Select(result => result.Value) .ToList(); return(propertyValues); }
public static IEnumerable <T> MapRows <T>( this IEnumerable <ExcelElement <Row> > rows, IParserProvider parserProvider, Func <IReadOnlyList <IPropertyValue>, T>?factory = null) { rows.AssertArgumentNotNull(nameof(rows)); parserProvider.AssertArgumentNotNull(nameof(parserProvider)); if (factory == null) { factory = list => (T)Activator.CreateInstance(typeof(T), list); } return(rows .AsDictionaryList(parserProvider) .Select(parserProvider.ParseProperties) .Select(factory)); }
private IParserProvider GetProvider(string identifier) { IEnumerable <Type> possibleTypes = ReflectionUtilities.IterateImplementingTypes( typeof(IParserProvider), Assembly.GetExecutingAssembly() ); foreach (Type next in possibleTypes) { IParserProvider candidate = (IParserProvider)Activator.CreateInstance(next); if (candidate.Identifier == identifier) { return(candidate); } } return(null); }
public async Task AddSource(FeedSource source) { await Console.Error.WriteLineAsync("[Builder] Downloading content"); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(source.Feed.Url); request.UserAgent = UserAgentHelper.UserAgent; WebResponse response = await request.GetResponseAsync(); await Console.Error.WriteLineAsync("[Builder] Generating feed header"); // Write the header await feed.WriteGenerator("Polyfeed", "https://github.com/sbrl/PolyFeed.git", Program.GetProgramVersion()); await feed.WriteId(source.Feed.Url); await feed.Write(new SyndicationLink(new Uri(source.Feed.Url), AtomLinkTypes.Self)); string lastModified = response.Headers.Get("last-modified"); if (string.IsNullOrWhiteSpace(lastModified)) { await feed.WriteUpdated(DateTimeOffset.Now); } else { await feed.WriteUpdated(DateTimeOffset.Parse(lastModified)); } string contentType = response.Headers.Get("content-type"); IParserProvider provider = GetProvider(source.Feed.SourceType); if (provider == null) { throw new ApplicationException($"Error: A provider for the source type {source.Feed.SourceType} wasn't found."); } provider.SetOutputFeed(feed, xml); await provider.ParseWebResponse(source, response); await Console.Error.WriteLineAsync("[Builder] Done!"); }
/// <summary> /// Maps rows to entities with specified <paramref name="parserProvider"/>. /// </summary> /// <typeparam name="T">Entity type.</typeparam> /// <param name="sheet">Sheet.</param> /// <param name="parserProvider"><see cref="IParserProvider"/>.</param> /// <param name="factory">Factory.</param> /// <returns>Enumeration of <typeparamref name="T"/>.</returns> public static IEnumerable <T> GetRowsAs <T>( this ExcelElement <Sheet> sheet, IParserProvider parserProvider, Func <IReadOnlyList <IPropertyValue>, T>?factory = null) { if (factory == null) { factory = list => (T)Activator.CreateInstance(typeof(T), list); } if (sheet.IsEmpty()) { return(Array.Empty <T>()); } return(sheet .GetRows() .MapRows(parserProvider, factory)); }
/// <summary> /// Gets value for each header. /// <see cref="IPropertyParser"/> will be attached to cells where cell header name same as <see cref="IPropertyParser.SourceName"/>. /// </summary> public static string[] GetRowValues( this ExcelElement <Row> row, ExcelElement <HeaderCell>[] headers, IParserProvider parserProvider, string?nullValue = null) { if (row.IsEmpty()) { return(Array.Empty <string>()); } var cells = row.GetRowCells(); string[] rowValues = new string[headers.Length]; for (int i = 0; i < headers.Length; i++) { var header = headers[i]; // Find cell for the same column. var cell = cells.FirstOrDefault(c => c.Data.CellReference.GetColumnReference() == header.Data.ColumnReference); if (cell != null) { // Set propertyParser for cell according column name var propertyParser = parserProvider.GetParsers().FirstOrDefault(parser => parser.SourceName == header.Data.Name); if (propertyParser != null) { cell.SetMetadata(propertyParser); } rowValues[i] = cell.GetCellValue(nullValue); } else { rowValues[i] = nullValue; } } return(rowValues); }
/// <summary> /// Creates cached parser provider (enumerates and caches parsers). /// </summary> /// <param name="parserProvider">Parser provider to cache.</param> /// <returns>Cached parser provider.</returns> public static IParserProvider Cached(this IParserProvider parserProvider) { return(new CachedParserProvider(parserProvider)); }
/// <summary> /// Initializes a new instance of the <see cref="CachedParserProvider"/> class. /// </summary> /// <param name="parserProvider">Parser provider to cache.</param> public CachedParserProvider(IParserProvider parserProvider) { parserProvider.AssertArgumentNotNull(nameof(parserProvider)); _parsers = parserProvider.GetParsers().ToArray(); }
public ParserManager(IParserProvider provider, IGameGrouper grouper) { this.grouper = grouper; this.Provider = provider; }