public async Task OrdersInDescendingOrder() { // Given List <string> content = new List <string>(); CountModule count = new CountModule("A") { AdditionalOutputs = 4, EnsureInputDocument = true }; CountModule count2 = new CountModule("A") { AdditionalOutputs = 2, EnsureInputDocument = true }; ConcatDocuments concat = new ConcatDocuments(count2); OrderDocuments orderBy = new OrderDocuments(Config.FromDocument <int>("A")).Descending(); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(async d => { content.Add(await d.GetContentStringAsync()); return((IDocument)null); })).ForEachDocument(); // When await ExecuteAsync(count, concat, orderBy, gatherData); // Then content.Count.ShouldBe(20); content.ShouldBe(new[] { "515", "514", "513", "412", "411", "410", "39", "38", "37", "26", "5", "25", "4", "24", "3", "13", "2", "12", "1", "11" }); }
public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig) { var result = new Dictionary <string, string>(); try { var config = (NavigateExecuteConfig)executeConfig; string url = config.CanNavigateByDoc? _document : executeConfig.Pattern; var docResult = ""; if (config.HeaderConfigs != null && config.HeaderConfigs.Any()) { docResult = _scrape.GetDocumentWithHeader(url, config.HeaderConfigs, executeConfig.Interval); } else { docResult = _scrape.GetDocument(url, executeConfig.Interval); } string key = Guid.NewGuid().ToString(); result.Add(key, docResult); } catch (Exception ex) { } return(result); }
public async Task OrdersThenByInAscendingOrder() { // Given List <string> content = new List <string>(); CountModule count = new CountModule("A") { AdditionalOutputs = 4, EnsureInputDocument = true }; CountModule count2 = new CountModule("B") { AdditionalOutputs = 1 }; OrderDocuments orderBy = new OrderDocuments(Config.FromDocument <int>("A")) .ThenBy(Config.FromDocument(d => d.GetInt("B"))); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(async d => { content.Add(await d.GetContentStringAsync()); return((IDocument)null); })).ForEachDocument(); // When await ExecuteAsync(count, count2, orderBy, gatherData); // Then Assert.AreEqual(10, content.Count); // (4+1) * (21+1) CollectionAssert.AreEqual(new[] { "11", "12", "23", "24", "35", "36", "47", "48", "59", "510" }, content); }
public async Task PaginateSetsDocumentsInMetadata() { // Given List <IList <string> > content = new List <IList <string> >(); CountModule count = new CountModule("A") { AdditionalOutputs = 7, EnsureInputDocument = true }; PaginateDocuments paginate = new PaginateDocuments(3); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(async d => { List <string> pageContent = await d.GetChildren() .ToAsyncEnumerable() .SelectAwait(async x => await x.GetContentStringAsync()) .ToListAsync(); content.Add(pageContent); })).ForEachDocument(); // When await ExecuteAsync(count, paginate, gatherData); // Then Assert.AreEqual(3, content.Count); CollectionAssert.AreEqual(new[] { "1", "2", "3" }, content[0]); CollectionAssert.AreEqual(new[] { "4", "5", "6" }, content[1]); CollectionAssert.AreEqual(new[] { "7", "8" }, content[2]); }
public async Task ContentAndMetadataReturnsCorrectDocuments() { // Given List <string> content = new List <string>(); List <object> values = new List <object>(); CreateDocuments documents = new CreateDocuments( Tuple.Create("A", new Dictionary <string, object> { { "Foo", "a" } }.AsEnumerable()), Tuple.Create("B", new Dictionary <string, object> { { "Foo", "b" } }.AsEnumerable()), Tuple.Create("C", new Dictionary <string, object> { { "Foo", "c" } }.AsEnumerable())); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(async d => { content.Add(await d.GetStringAsync()); values.Add(d["Foo"]); return((object)null); })).ForEachDocument(); // When IReadOnlyList <IDocument> results = await ExecuteAsync(documents, gatherData); // Then Assert.AreEqual(3, content.Count); Assert.AreEqual(3, values.Count); CollectionAssert.AreEqual(new[] { "A", "B", "C" }, content); CollectionAssert.AreEqual(new[] { "a", "b", "c" }, values); }
public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig) { var result = new Dictionary <string, string>(); try { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(_document); var scrapeDatas = htmlDoc.DocumentNode.SelectNodes(executeConfig.Pattern); if (scrapeDatas != null) { foreach (var scrapeData in scrapeDatas) { string key = Guid.NewGuid().ToString(); string dicValue = ""; dicValue = scrapeData.InnerHtml; result.Add(key, dicValue); } } } catch (Exception ex) { } return(result); }
public ExecutionResult AssertQuery(Action <ExecuteConfig> configure) { var config = new ExecuteConfig(); configure(config); var schema = Builder.Build(config.Definitions); config.ConfigureBuildedSchema?.Invoke(schema); schema.Initialize(); var queryResult = CreateQueryResult(config.ExpectedResult); return(AssertQuery( _ => { _.Schema = schema; _.Query = config.Query; _.Variables = config.Variables.ToInputs(); _.Root = config.Root; _.ThrowOnUnhandledException = config.ThrowOnUnhandledException; _.Listeners.AddRange(config.Listeners); }, queryResult)); }
public async Task MetadataReturnsCorrectDocuments() { // Given List <object> values = new List <object>(); CreateDocuments documents = new CreateDocuments( new Dictionary <string, object> { { "Foo", "a" } }, new Dictionary <string, object> { { "Foo", "b" } }, new Dictionary <string, object> { { "Foo", "c" } }); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(d => { values.Add(d["Foo"]); return((object)null); })).ForEachDocument(); // When IReadOnlyList <IDocument> results = await ExecuteAsync(documents, gatherData); // Then Assert.AreEqual(3, values.Count); CollectionAssert.AreEqual(new[] { "a", "b", "c" }, values); }
public async Task SetsDocumentsInMetadata() { // Given List <IList <string> > content = new List <IList <string> >(); CountModule count = new CountModule("A") { AdditionalOutputs = 7, EnsureInputDocument = true }; GroupDocuments groupByMany = new GroupDocuments(Config.FromDocument(d => new[] { d.GetInt("A") % 3, 3 })); OrderDocuments orderBy = new OrderDocuments(Config.FromDocument <int>(Keys.GroupKey)); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(async d => { List <string> groupContent = await d.GetChildren() .ToAsyncEnumerable() .SelectAwait(async x => await x.GetContentStringAsync()) .ToListAsync(); content.Add(groupContent.ToList()); return((object)null); })).ForEachDocument(); // When IReadOnlyList <IDocument> results = await ExecuteAsync(count, groupByMany, orderBy, gatherData); // Then Assert.AreEqual(4, content.Count); CollectionAssert.AreEquivalent(new[] { "3", "6" }, content[0]); CollectionAssert.AreEquivalent(new[] { "1", "4", "7" }, content[1]); CollectionAssert.AreEquivalent(new[] { "2", "5", "8" }, content[2]); CollectionAssert.AreEquivalent(new[] { "1", "2", "3", "4", "5", "6", "7", "8" }, content[3]); }
public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig) { var result = new Dictionary <string, string>(); try { var config = (HTMLAttributeExecuteConfig)executeConfig; var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(_document); var scrapeDatas = htmlDoc.DocumentNode.SelectNodes(config.Pattern); if (scrapeDatas != null) { foreach (var scrapeData in scrapeDatas) { string key = Guid.NewGuid().ToString(); string dicValue = ""; string attr = config.HtmlAttribute; dicValue = scrapeData.Attributes[attr].Value; if (!string.IsNullOrEmpty(config.EnclosePrevious)) { dicValue = config.EnclosePrevious + dicValue; } result.Add(key, dicValue); } } } catch (Exception ex) { } return(result); }
public async Task SpecifiedPipelineDocumentsAreReturnedInCorrectOrder() { // Given List <string> content = new List <string>(); IServiceCollection serviceCollection = new ServiceCollection() .AddSingleton <ILoggerProvider>(new TestLoggerProvider()); Engine engine = new Engine(serviceCollection); CancellationTokenSource cancellationTokenSource = new CancellationTokenSource(); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(async d => content.Add(await d.GetContentStringAsync()))).ForEachDocument(); engine.Pipelines.Add("Foo", new TestPipeline(new CreateDocuments("A", "B", "C", "D"))); engine.Pipelines.Add("Bar", new TestPipeline(new CreateDocuments("E", "F"))); engine.Pipelines.Add("Baz", new TestPipeline(new CreateDocuments("G", "H"))); engine.Pipelines.Add( new TestPipeline(new ReplaceDocuments("Baz", "Foo"), gatherData) .WithDependencies("Foo", "Bar", "Baz")); // When await engine.ExecuteAsync(cancellationTokenSource.Token); // Then Assert.AreEqual(6, content.Count); CollectionAssert.AreEquivalent(new[] { "G", "H", "A", "B", "C", "D" }, content); }
public async Task GroupByMetadataKeyWithMissingMetadata() { // Given List <int> groupKey = new List <int>(); CountModule count = new CountModule("A") { AdditionalOutputs = 7, EnsureInputDocument = true }; ForEachDocument meta = new ExecuteConfig( Config.FromDocument(d => { int groupMetadata = d.GetInt("A") % 3; return(groupMetadata == 0 ? d : d.Clone(new MetadataItems { { "GroupMetadata", new object[] { groupMetadata, 3 } } })); })).ForEachDocument(); GroupDocuments groupByMany = new GroupDocuments("GroupMetadata"); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(d => { groupKey.Add(d.GetInt(Keys.GroupKey)); return((object)null); })).ForEachDocument(); // When IReadOnlyList <IDocument> results = await ExecuteAsync(count, meta, groupByMany, gatherData); // Then CollectionAssert.AreEquivalent(new[] { 1, 2, 3 }, groupKey); }
public void ContextConfigDoesNotThrowForNullResult() { // Given ExecuteConfig execute = new ExecuteConfig(Config.FromContext(_ => (object)null)); // When, Then Should.NotThrow(() => ExecuteAsync(Array.Empty <TestDocument>(), execute).GetAwaiter().GetResult()); }
public async Task DocumentConfigDoesNotThrowForNullResult() { // Given ExecuteConfig execute = new ExecuteConfig(Config.FromDocument((_, __) => (object)null)); // When await ExecuteAsync(execute); // Then }
public async Task ValueConfigReturnsNewDocumentWithStringContent() { // Given ExecuteConfig execute = new ExecuteConfig("Foo"); // When IReadOnlyList <TestDocument> results = await ExecuteAsync(execute); // Then results.Single().Content.ShouldBe("Foo"); }
public async Task ValueConfigReturnsNewDocumentsWithStringContent() { // Given ExecuteConfig execute = new ExecuteConfig(new[] { "Foo", "Bar" }); // When IReadOnlyList <TestDocument> results = await ExecuteAsync(execute); // Then results.Select(x => x.Content).ShouldBe(new[] { "Foo", "Bar" }); }
public async Task ContextConfigReturnsDocumentForSingleResultDocument() { // Given TestDocument document = new TestDocument(); ExecuteConfig execute = new ExecuteConfig(Config.FromContext(_ => document)); // When TestDocument result = await ExecuteAsync(Array.Empty <TestDocument>(), execute).SingleAsync(); // Then result.ShouldBe(document); }
public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig) { var result = new Dictionary <string, string>(); var jDocument = JsonConvert.DeserializeObject <JToken>(_document); var scrapeDatas = jDocument.SelectTokens(executeConfig.Pattern); foreach (var scrapeData in scrapeDatas) { string dicValue = scrapeData.Value <string>(); string key = Guid.NewGuid().ToString(); result.Add(key, dicValue); } return(result); }
protected ExecuteConfig BuildExecuteConfig(JToken config) { ExecuteConfig result = null; var type = config.SelectToken("ScrapeType").Value <string>(); var stepType = (ScrapingType)Enum.Parse(typeof(ScrapingType), type); switch (stepType) { case ScrapingType.HTMLValue: case ScrapingType.HTMLDoc: case ScrapingType.JsonDoc: case ScrapingType.JsonVlue: result = JsonConvert.DeserializeObject <ExecuteConfig>(config.ToString()); break; case ScrapingType.ApiUrl: case ScrapingType.WebUrl: result = JsonConvert.DeserializeObject <NavigateExecuteConfig>(config.ToString()); break; case ScrapingType.Text: result = new TextExecutorConfig(); result.SetData(config); break; case ScrapingType.HTMLAttribute: result = JsonConvert.DeserializeObject <HTMLAttributeExecuteConfig>(config.ToString()); break; case ScrapingType.WebDriver: break; case ScrapingType.ReplaceTemp: result = JsonConvert.DeserializeObject <ReplaceTempExecuteConfig>(config.ToString()); break; case ScrapingType.Paging: result = JsonConvert.DeserializeObject <PagingExecuteConfig>(config.ToString()); break; case ScrapingType.Header: break; default: break; } return(result); }
public async Task DocumentConfigReturnsDocumentForSingleResultDocument() { // Given TestDocument document = new TestDocument(); CountModule count = new CountModule("A") { EnsureInputDocument = true }; ExecuteConfig execute = new ExecuteConfig(Config.FromDocument((_, __) => document)); // When IReadOnlyList <TestDocument> result = await ExecuteAsync(count, execute); // Then CollectionAssert.AreEquivalent(document, result.Single()); }
public async Task ContextConfigReturnsInputsForNullResult() { // Given TestDocument[] inputs = { new TestDocument(), new TestDocument() }; ExecuteConfig execute = new ExecuteConfig(Config.FromContext(_ => (object)null)); // When IReadOnlyList <TestDocument> outputs = await ExecuteAsync(inputs, execute); // Then CollectionAssert.AreEqual(inputs, outputs); }
public async Task DocumentConfigReturnsInputDocumentsForAction() { // Given TestDocument[] inputs = { new TestDocument("Foo"), new TestDocument("Bar") }; ExecuteConfig execute = new ExecuteConfig(Config.FromDocument((doc, ctx) => { _ = 1 + 1; })); // When IReadOnlyList <TestDocument> results = await ExecuteAsync(inputs, execute); // Then results.Select(x => x.Content).ShouldBe(new[] { "Foo", "Bar" }); }
public async Task DocumentConfigSetsNewContent() { // Given TestDocument[] inputs = { new TestDocument(), new TestDocument() }; int count = 0; ExecuteConfig execute = new ExecuteConfig(Config.FromDocument((d, c) => (object)count++)); // When IReadOnlyList <TestDocument> results = await ExecuteAsync(inputs, execute); // Then results.Select(x => x.Content).ShouldBe(new[] { "0", "1" }, true); }
public async Task ContentReturnsCorrectDocuments() { // Given List <string> content = new List <string>(); CreateDocuments documents = new CreateDocuments("A", "B", "C", "D"); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(async d => { content.Add(await d.GetStringAsync()); return((object)null); })).ForEachDocument(); // When IReadOnlyList <IDocument> results = await ExecuteAsync(documents, gatherData); // Then Assert.AreEqual(4, content.Count); CollectionAssert.AreEqual(new[] { "A", "B", "C", "D" }, content); }
public Dictionary <string, string> ScrapeData(ExecuteConfig ExecuteConfig) { var result = new Dictionary <string, string>(); var config = (TextExecutorConfig)ExecuteConfig; string key = Guid.NewGuid().ToString(); // Get Text Handle config var textHandleConfig = config.TextHandleConfig; string docResult = ""; if (!string.IsNullOrEmpty(_document)) { switch (textHandleConfig.TextHanldeType) { case TextHanldeType.Split: var splitConfig = (TextSplitingHandleConfig)textHandleConfig; docResult = SlipHandler(splitConfig); break; case TextHanldeType.Regex: // Support Regex var regexConfig = (TextRegexHandleConfig)textHandleConfig; docResult = RegexHandler(regexConfig); break; case TextHanldeType.Join: //docResult = JoinText(ExecuteConfig); break; case TextHanldeType.Replace: var replaceConfig = (TextReplaceHandleConfig)textHandleConfig; docResult = ReplaceText(replaceConfig); break; default: docResult = ""; break; } } result.Add(key, docResult); return(result); }
public async Task ContextConfigRunsModuleAgainstInputDocuments() { // Given TestDocument[] inputs = { new TestDocument(), new TestDocument() }; int count = 0; ExecuteConfig execute = new ExecuteConfig(Config.FromContext(c => { count++; return((object)null); })); // When await ExecuteAsync(inputs, execute); // Then count.ShouldBe(1); }
public string Execute(ExecuteConfig config) { var executeConfig = (WebDriveExecuteConfig)config; string result = ""; switch (executeConfig.EventType) { case EventType.Click: Click(); break; case EventType.Navigate: Navigate(executeConfig.Pattern); break; case EventType.Find: FindElement(executeConfig.FindType, executeConfig.Pattern); break; case EventType.Get: GetElement(executeConfig.FindType, executeConfig.Pattern); break; case EventType.FindFromElements: FindFromElements(executeConfig.FindType, executeConfig.Pattern); break; default: break; } var responseType = executeConfig.ResponseType; // Get Data If needed if (_driver != null) { result = GetDataFromDriver(responseType); } return(result); }
public async Task CaseInsensitiveStringComparer() { // Given List <object> groupKey = new List <object>(); ExecuteConfig meta = new ExecuteConfig(Config.FromContext( c => new IDocument[] { c.CreateDocument(new MetadataItems { { "Tag", new object[] { "A", "b" } } }), c.CreateDocument(new MetadataItems { { "Tag", new object[] { "B" } } }), c.CreateDocument(new MetadataItems { { "Tag", new object[] { "C" } } }), c.CreateDocument(new MetadataItems { { "Tag", new object[] { "c" } } }), c.CreateDocument(new MetadataItems { { "Tag", new object[] { 1 } } }), c.CreateDocument(new MetadataItems { { "Tag", new object[] { "1" } } }) })); GroupDocuments groupByMany = new GroupDocuments("Tag").WithComparer(StringComparer.OrdinalIgnoreCase); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(d => { groupKey.Add(d.Get(Keys.GroupKey)); return((object)null); })).ForEachDocument(); // When IReadOnlyList <IDocument> results = await ExecuteAsync(meta, groupByMany, gatherData); // Then CollectionAssert.AreEquivalent(new object[] { "A", "b", "C", 1 }, groupKey); }
public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig) { var result = new Dictionary <string, string>(); try { //if (executeConfig.HeaderConfigs!=null) //{ // var url = executeConfig?.Pattern; // var docResult = _scrape.GetDocumentWithHeader(url, executeConfig.HeaderConfigs, executeConfig.Interval); // string key = string.IsNullOrEmpty(executeConfig.AttributeName) ? // Guid.NewGuid().ToString() : executeConfig.AttributeName; // result.Add(key, docResult); //} // Get URL } catch (Exception) { } return(result); }
public async Task SetsCorrectMetadata() { // Given List <int> groupKey = new List <int>(); CountModule count = new CountModule("A") { AdditionalOutputs = 7, EnsureInputDocument = true }; GroupDocuments groupByMany = new GroupDocuments(Config.FromDocument(d => new[] { d.GetInt("A") % 3, 3 })); ForEachDocument gatherData = new ExecuteConfig( Config.FromDocument(d => { groupKey.Add(d.GetInt(Keys.GroupKey)); return(d); })).ForEachDocument(); // When IReadOnlyList <IDocument> results = await ExecuteAsync(count, groupByMany, gatherData); // Then CollectionAssert.AreEquivalent(new[] { 0, 1, 2, 3 }, groupKey); }