コード例 #1
0
        public static string Execute(string html, IParserProvider parser, Uri page, CookieContainer cookies)
        {
            if (GhostConfiguration.ScriptEngine != null)
            {
                var config = new Configuration();
                var internalparse = new DocumentBuilder(config);
                var document = internalparse.FromHtml(html);

                //var scriptInitialize = document.CreateElement("script");
                //scriptInitialize.TextContent = string.Format(@"document.location.href='{0}';", page.OriginalString);
                //document.Head.Append(scriptInitialize);

                var scripts = document.Scripts.Where(x => x.Attributes.Any(i => i.Name == "src")).ToList();
                var styles = document.QuerySelectorAll("link[href]").Cast<IElement>().ToList();
                var iframes = document.QuerySelectorAll("iframe[src]").Cast<IElement>().ToList();
                foreach (var script in scripts)
                    ResolveUrls(page, script, "src");

                foreach (var script in styles)
                    ResolveUrls(page, script, "href");

                foreach (var script in iframes)
                    ResolveUrls(page, script, "src");

                var htmlParsed = document.ToHtml();
                return GhostConfiguration.ScriptEngine.Run(htmlParsed, cookies);
            }
            return html;
        }
コード例 #2
0
        private void SetUpParsers()
        {
            var humidityParser = Substitute.For <IParser>();

            humidityParser.Type.Returns("humidity");
            humidityParser.ParseStream(_humidityStream).Returns(_humidityParsedData);
            humidityParser.ParseStream(Arg.Is <Stream>(x => x.Length.Equals(_humidityHistoricalEntryStream.Length))).Returns(_humidityHistoricalParsedData);

            var rainfallParser = Substitute.For <IParser>();

            rainfallParser.Type.Returns("rainfall");
            rainfallParser.ParseStream(_rainfallStream).Returns(_rainfallParsedData);
            rainfallParser.ParseStream(Arg.Is <Stream>(x => x.Length.Equals(_rainfallHistoricalEntryStream.Length))).Returns(_rainfallHistoricalParsedData);

            var temperatureParser = Substitute.For <IParser>();

            temperatureParser.Type.Returns("temperature");
            temperatureParser.ParseStream(_temperatureStream).Returns(_temperatureParsedData);
            temperatureParser.ParseStream(Arg.Is <Stream>(x => x.Length.Equals(_temperatureHistoricalEntryStream.Length))).Returns(_temperatureHistoricalParsedData);

            _parserProvider = Substitute.For <IParserProvider>();
            _parserProvider.GetParser(_humiditySensorType).Returns(humidityParser);
            _parserProvider.GetParser(_rainfallSensorType).Returns(rainfallParser);
            _parserProvider.GetParser(_temperatureSensorType).Returns(temperatureParser);
        }
コード例 #3
0
        /// <summary>
        /// Gets rows as enumeration of dictionaries.
        /// </summary>
        /// <param name="rows">Source rows.</param>
        /// <param name="parserProvider"></param>
        /// <returns></returns>
        public static IEnumerable <IReadOnlyDictionary <string, string> > AsDictionaryList(
            this IEnumerable <ExcelElement <Row> > rows,
            IParserProvider parserProvider)
        {
            rows.AssertArgumentNotNull(nameof(rows));
            parserProvider.AssertArgumentNotNull(nameof(parserProvider));

            ExcelElement <HeaderCell>[]? headers = null;
            foreach (var row in rows)
            {
                if (headers == null)
                {
                    // Use first row as headers
                    headers = row.GetHeaders();
                    continue;
                }

                //TODO: Use cached parserProvider
                var rowValues = row.GetRowValues(headers: headers, parserProvider: parserProvider);

                // skip empty line
                if (rowValues.All(string.IsNullOrWhiteSpace))
                {
                    continue;
                }

                var headerNames   = headers.Select(header => header.Data.Name ?? header.Data.ColumnReference).ToArrayDebug();
                var expandoObject = headerNames
                                    .Zip(rowValues, (header, value) => (header, value))
                                    .ToDictionary(tuple => tuple.header, tuple => tuple.value);

                yield return(expandoObject);
            }
        }
コード例 #4
0
        public static IEnumerable <TableDataRow> AsParsedRows(
            this IEnumerable <ExcelElement <Row> > rows,
            IParserProvider parserProvider = null)
        {
            ExcelElement <HeaderCell>[] headers = null;
            foreach (var row in rows)
            {
                if (headers == null)
                {
                    headers = row.GetHeaders();
                    continue;
                }

                var rowValues = row.GetRowValues(headers: headers, parserProvider: parserProvider);

                // skip empty line
                if (rowValues.All(string.IsNullOrWhiteSpace))
                {
                    continue;
                }

                var headerNames   = headers.Select(header => header.Data.Name ?? header.Data.ColumnReference).ToArrayDebug();
                var expandoObject = headerNames
                                    .Zip(rowValues, (header, value) => (header, value))
                                    .ToDictionary(tuple => tuple.header, tuple => tuple.value);

                yield return(new TableDataRow(expandoObject, row));
            }
        }
コード例 #5
0
 public CrawlJob(IOutgoingNotifications outgoingNotifications, IParserProvider parserProvider, ILogger logger, QueryExecutor queryExecutor)
 {
     _outgoingNotifications = outgoingNotifications;
     _parserProvider        = parserProvider;
     _queryExecutor         = queryExecutor;
     _logger = logger;
 }
コード例 #6
0
        public static string Execute(string html, IParserProvider parser, Uri page, CookieContainer cookies)
        {
            if (GhostConfiguration.ScriptEngine != null)
            {
                var config        = new Configuration();
                var internalparse = new DocumentBuilder(config);
                var document      = internalparse.FromHtml(html);

                //var scriptInitialize = document.CreateElement("script");
                //scriptInitialize.TextContent = string.Format(@"document.location.href='{0}';", page.OriginalString);
                //document.Head.Append(scriptInitialize);

                var scripts = document.Scripts.Where(x => x.Attributes.Any(i => i.Name == "src")).ToList();
                var styles  = document.QuerySelectorAll("link[href]").Cast <IElement>().ToList();
                var iframes = document.QuerySelectorAll("iframe[src]").Cast <IElement>().ToList();
                foreach (var script in scripts)
                {
                    ResolveUrls(page, script, "src");
                }

                foreach (var script in styles)
                {
                    ResolveUrls(page, script, "href");
                }

                foreach (var script in iframes)
                {
                    ResolveUrls(page, script, "src");
                }

                var htmlParsed = document.ToHtml();
                return(GhostConfiguration.ScriptEngine.Run(htmlParsed, cookies));
            }
            return(html);
        }
コード例 #7
0
 /// <summary>
 /// Public constructor used for dependencies injection
 /// </summary>
 /// <param name="rssSourcesProvider">RSS Sources prodiver instance</param>
 /// <param name="newsProvider">New provider instance</param>
 /// <param name="storage">Storage provider instance</param>
 /// <param name="parser">Parser provider instance</param>
 public SourcesController(IRssSourcesProvider rssSourcesProvider, INewsProvider newsProvider,
     IContentStorage storage, IParserProvider parser)
 {
     _rssSourcesProvider = rssSourcesProvider;
     _newsProvider = newsProvider;
     _contentStorage = storage;
     _newsParser = parser;
 }
コード例 #8
0
        /// <summary>
        /// Parses dictionary to <see cref="IPropertyValue"/> list.
        /// </summary>
        /// <param name="parserProvider"><see cref="IParserProvider"/>.</param>
        /// <param name="sourceRow">Source data.</param>
        /// <returns><see cref="IPropertyValue"/> list.</returns>
        public static IReadOnlyList <IPropertyValue> ParseProperties(this IParserProvider parserProvider, IReadOnlyDictionary <string, string> sourceRow)
        {
            var propertyValues = parserProvider
                                 .GetParsers()
                                 .Select(parser => ParseRowOrGetDefault(parser, sourceRow))
                                 .SelectMany(propertyValue => propertyValue)
                                 .ToList();

            return(propertyValues);
        }
コード例 #9
0
 public Parser(
     IDataProvider <string> dataProvider,
     IParserProvider <string, Uri> parserProvider,
     ISenderProvider <Uri> senderProvider,
     IValidator <string> validator)
 {
     this.dataProvider   = dataProvider ?? throw new ArgumentNullException(nameof(dataProvider));
     this.parserProvider = parserProvider ?? throw new ArgumentNullException(nameof(parserProvider));
     this.senderProvider = senderProvider ?? throw new ArgumentNullException(nameof(senderProvider));
     this.validator      = validator ?? throw new ArgumentNullException(nameof(validator));
 }
コード例 #10
0
 public DevicesHandler(
     IParserProvider parserProvider,
     IFilePathBuilder filePathBuilder,
     IBlobClientProvider blobClientProvider,
     Func <Stream, ZipArchive> zipArchiveProvider,
     IDeviceDataBuilder deviceDataBuilder)
 {
     _parserProvider     = parserProvider;
     _filePathBuilder    = filePathBuilder;
     _blobClientProvider = blobClientProvider;
     _zipArchiveProvider = zipArchiveProvider;
     _deviceDataBuilder  = deviceDataBuilder;
 }
コード例 #11
0
        /// <summary>
        /// Parses dictionary to <see cref="IPropertyValue"/> list.
        /// </summary>
        /// <param name="parserProvider"><see cref="IParserProvider"/>.</param>
        /// <param name="sourceRow">Source data.</param>
        /// <returns><see cref="IPropertyValue"/> list.</returns>
        public static IReadOnlyList <IPropertyValue> ParseProperties(this IParserProvider parserProvider, IReadOnlyDictionary <string, string> sourceRow)
        {
            parserProvider.AssertArgumentNotNull(nameof(parserProvider));
            sourceRow.AssertArgumentNotNull(nameof(sourceRow));

            var propertyValues = parserProvider
                                 .GetParsers()
                                 .Select(parser => parser.ParseRowUntyped(sourceRow))
                                 .Where(result => result.IsSuccess)
                                 .Select(result => result.Value)
                                 .ToList();

            return(propertyValues);
        }
コード例 #12
0
        public static IEnumerable <T> MapRows <T>(
            this IEnumerable <ExcelElement <Row> > rows,
            IParserProvider parserProvider,
            Func <IReadOnlyList <IPropertyValue>, T>?factory = null)
        {
            rows.AssertArgumentNotNull(nameof(rows));
            parserProvider.AssertArgumentNotNull(nameof(parserProvider));

            if (factory == null)
            {
                factory = list => (T)Activator.CreateInstance(typeof(T), list);
            }

            return(rows
                   .AsDictionaryList(parserProvider)
                   .Select(parserProvider.ParseProperties)
                   .Select(factory));
        }
コード例 #13
0
ファイル: FeedBuilder.cs プロジェクト: sbrl/PolyFeed
        private IParserProvider GetProvider(string identifier)
        {
            IEnumerable <Type> possibleTypes = ReflectionUtilities.IterateImplementingTypes(
                typeof(IParserProvider),
                Assembly.GetExecutingAssembly()
                );

            foreach (Type next in possibleTypes)
            {
                IParserProvider candidate = (IParserProvider)Activator.CreateInstance(next);
                if (candidate.Identifier == identifier)
                {
                    return(candidate);
                }
            }

            return(null);
        }
コード例 #14
0
ファイル: FeedBuilder.cs プロジェクト: sbrl/PolyFeed
        public async Task AddSource(FeedSource source)
        {
            await Console.Error.WriteLineAsync("[Builder] Downloading content");

            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(source.Feed.Url);

            request.UserAgent = UserAgentHelper.UserAgent;
            WebResponse response = await request.GetResponseAsync();

            await Console.Error.WriteLineAsync("[Builder] Generating feed header");

            // Write the header
            await feed.WriteGenerator("Polyfeed", "https://github.com/sbrl/PolyFeed.git", Program.GetProgramVersion());

            await feed.WriteId(source.Feed.Url);

            await feed.Write(new SyndicationLink(new Uri(source.Feed.Url), AtomLinkTypes.Self));

            string lastModified = response.Headers.Get("last-modified");

            if (string.IsNullOrWhiteSpace(lastModified))
            {
                await feed.WriteUpdated(DateTimeOffset.Now);
            }
            else
            {
                await feed.WriteUpdated(DateTimeOffset.Parse(lastModified));
            }

            string contentType = response.Headers.Get("content-type");

            IParserProvider provider = GetProvider(source.Feed.SourceType);

            if (provider == null)
            {
                throw new ApplicationException($"Error: A provider for the source type {source.Feed.SourceType} wasn't found.");
            }

            provider.SetOutputFeed(feed, xml);
            await provider.ParseWebResponse(source, response);

            await Console.Error.WriteLineAsync("[Builder] Done!");
        }
コード例 #15
0
        /// <summary>
        /// Maps rows to entities with specified <paramref name="parserProvider"/>.
        /// </summary>
        /// <typeparam name="T">Entity type.</typeparam>
        /// <param name="sheet">Sheet.</param>
        /// <param name="parserProvider"><see cref="IParserProvider"/>.</param>
        /// <param name="factory">Factory.</param>
        /// <returns>Enumeration of <typeparamref name="T"/>.</returns>
        public static IEnumerable <T> GetRowsAs <T>(
            this ExcelElement <Sheet> sheet,
            IParserProvider parserProvider,
            Func <IReadOnlyList <IPropertyValue>, T>?factory = null)
        {
            if (factory == null)
            {
                factory = list => (T)Activator.CreateInstance(typeof(T), list);
            }

            if (sheet.IsEmpty())
            {
                return(Array.Empty <T>());
            }

            return(sheet
                   .GetRows()
                   .MapRows(parserProvider, factory));
        }
コード例 #16
0
        /// <summary>
        /// Gets value for each header.
        /// <see cref="IPropertyParser"/> will be attached to cells where cell header name same as <see cref="IPropertyParser.SourceName"/>.
        /// </summary>
        public static string[] GetRowValues(
            this ExcelElement <Row> row,
            ExcelElement <HeaderCell>[] headers,
            IParserProvider parserProvider,
            string?nullValue = null)
        {
            if (row.IsEmpty())
            {
                return(Array.Empty <string>());
            }

            var cells = row.GetRowCells();

            string[] rowValues = new string[headers.Length];
            for (int i = 0; i < headers.Length; i++)
            {
                var header = headers[i];

                // Find cell for the same column.
                var cell = cells.FirstOrDefault(c => c.Data.CellReference.GetColumnReference() == header.Data.ColumnReference);

                if (cell != null)
                {
                    // Set propertyParser for cell according column name
                    var propertyParser = parserProvider.GetParsers().FirstOrDefault(parser => parser.SourceName == header.Data.Name);
                    if (propertyParser != null)
                    {
                        cell.SetMetadata(propertyParser);
                    }

                    rowValues[i] = cell.GetCellValue(nullValue);
                }
                else
                {
                    rowValues[i] = nullValue;
                }
            }

            return(rowValues);
        }
コード例 #17
0
 /// <summary>
 /// Creates cached parser provider (enumerates and caches parsers).
 /// </summary>
 /// <param name="parserProvider">Parser provider to cache.</param>
 /// <returns>Cached parser provider.</returns>
 public static IParserProvider Cached(this IParserProvider parserProvider)
 {
     return(new CachedParserProvider(parserProvider));
 }
コード例 #18
0
        /// <summary>
        /// Initializes a new instance of the <see cref="CachedParserProvider"/> class.
        /// </summary>
        /// <param name="parserProvider">Parser provider to cache.</param>
        public CachedParserProvider(IParserProvider parserProvider)
        {
            parserProvider.AssertArgumentNotNull(nameof(parserProvider));

            _parsers = parserProvider.GetParsers().ToArray();
        }
コード例 #19
0
ファイル: ParserManager.cs プロジェクト: ForkerTeam/Forker
 public ParserManager(IParserProvider provider, IGameGrouper grouper)
 {
     this.grouper = grouper;
     this.Provider = provider;
 }