Пример #1
0
            public async Task OrdersInDescendingOrder()
            {
                // Given
                List <string> content = new List <string>();
                CountModule   count   = new CountModule("A")
                {
                    AdditionalOutputs   = 4,
                    EnsureInputDocument = true
                };
                CountModule count2 = new CountModule("A")
                {
                    AdditionalOutputs   = 2,
                    EnsureInputDocument = true
                };
                ConcatDocuments concat     = new ConcatDocuments(count2);
                OrderDocuments  orderBy    = new OrderDocuments(Config.FromDocument <int>("A")).Descending();
                ForEachDocument gatherData = new ExecuteConfig(
                    Config.FromDocument(async d =>
                {
                    content.Add(await d.GetContentStringAsync());
                    return((IDocument)null);
                })).ForEachDocument();

                // When
                await ExecuteAsync(count, concat, orderBy, gatherData);

                // Then
                content.Count.ShouldBe(20);
                content.ShouldBe(new[] { "515", "514", "513", "412", "411", "410", "39", "38", "37", "26", "5", "25", "4", "24", "3", "13", "2", "12", "1", "11" });
            }
Пример #2
0
        public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig)
        {
            var result = new Dictionary <string, string>();

            try
            {
                var    config    = (NavigateExecuteConfig)executeConfig;
                string url       = config.CanNavigateByDoc? _document : executeConfig.Pattern;
                var    docResult = "";
                if (config.HeaderConfigs != null && config.HeaderConfigs.Any())
                {
                    docResult = _scrape.GetDocumentWithHeader(url, config.HeaderConfigs, executeConfig.Interval);
                }
                else
                {
                    docResult = _scrape.GetDocument(url, executeConfig.Interval);
                }
                string key = Guid.NewGuid().ToString();
                result.Add(key, docResult);
            }
            catch (Exception ex)
            {
            }

            return(result);
        }
Пример #3
0
            public async Task OrdersThenByInAscendingOrder()
            {
                // Given
                List <string> content = new List <string>();
                CountModule   count   = new CountModule("A")
                {
                    AdditionalOutputs   = 4,
                    EnsureInputDocument = true
                };
                CountModule count2 = new CountModule("B")
                {
                    AdditionalOutputs = 1
                };
                OrderDocuments orderBy = new OrderDocuments(Config.FromDocument <int>("A"))
                                         .ThenBy(Config.FromDocument(d => d.GetInt("B")));
                ForEachDocument gatherData = new ExecuteConfig(
                    Config.FromDocument(async d =>
                {
                    content.Add(await d.GetContentStringAsync());
                    return((IDocument)null);
                })).ForEachDocument();

                // When
                await ExecuteAsync(count, count2, orderBy, gatherData);

                // Then
                Assert.AreEqual(10, content.Count); // (4+1) * (21+1)
                CollectionAssert.AreEqual(new[] { "11", "12", "23", "24", "35", "36", "47", "48", "59", "510" }, content);
            }
Пример #4
0
            public async Task PaginateSetsDocumentsInMetadata()
            {
                // Given
                List <IList <string> > content = new List <IList <string> >();
                CountModule            count   = new CountModule("A")
                {
                    AdditionalOutputs   = 7,
                    EnsureInputDocument = true
                };
                PaginateDocuments paginate   = new PaginateDocuments(3);
                ForEachDocument   gatherData = new ExecuteConfig(
                    Config.FromDocument(async d =>
                {
                    List <string> pageContent = await d.GetChildren()
                                                .ToAsyncEnumerable()
                                                .SelectAwait(async x => await x.GetContentStringAsync())
                                                .ToListAsync();
                    content.Add(pageContent);
                })).ForEachDocument();

                // When
                await ExecuteAsync(count, paginate, gatherData);

                // Then
                Assert.AreEqual(3, content.Count);
                CollectionAssert.AreEqual(new[] { "1", "2", "3" }, content[0]);
                CollectionAssert.AreEqual(new[] { "4", "5", "6" }, content[1]);
                CollectionAssert.AreEqual(new[] { "7", "8" }, content[2]);
            }
Пример #5
0
            public async Task ContentAndMetadataReturnsCorrectDocuments()
            {
                // Given
                List <string>   content   = new List <string>();
                List <object>   values    = new List <object>();
                CreateDocuments documents = new CreateDocuments(
                    Tuple.Create("A", new Dictionary <string, object> {
                    { "Foo", "a" }
                }.AsEnumerable()),
                    Tuple.Create("B", new Dictionary <string, object> {
                    { "Foo", "b" }
                }.AsEnumerable()),
                    Tuple.Create("C", new Dictionary <string, object> {
                    { "Foo", "c" }
                }.AsEnumerable()));
                ForEachDocument gatherData = new ExecuteConfig(
                    Config.FromDocument(async d =>
                {
                    content.Add(await d.GetStringAsync());
                    values.Add(d["Foo"]);
                    return((object)null);
                })).ForEachDocument();

                // When
                IReadOnlyList <IDocument> results = await ExecuteAsync(documents, gatherData);

                // Then
                Assert.AreEqual(3, content.Count);
                Assert.AreEqual(3, values.Count);
                CollectionAssert.AreEqual(new[] { "A", "B", "C" }, content);
                CollectionAssert.AreEqual(new[] { "a", "b", "c" }, values);
            }
Пример #6
0
        public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig)
        {
            var result = new Dictionary <string, string>();

            try
            {
                var htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(_document);

                var scrapeDatas = htmlDoc.DocumentNode.SelectNodes(executeConfig.Pattern);
                if (scrapeDatas != null)
                {
                    foreach (var scrapeData in scrapeDatas)
                    {
                        string key      = Guid.NewGuid().ToString();
                        string dicValue = "";
                        dicValue = scrapeData.InnerHtml;
                        result.Add(key, dicValue);
                    }
                }
            }
            catch (Exception ex)
            {
            }



            return(result);
        }
Пример #7
0
    public ExecutionResult AssertQuery(Action <ExecuteConfig> configure)
    {
        var config = new ExecuteConfig();

        configure(config);

        var schema = Builder.Build(config.Definitions);

        config.ConfigureBuildedSchema?.Invoke(schema);
        schema.Initialize();

        var queryResult = CreateQueryResult(config.ExpectedResult);

        return(AssertQuery(
                   _ =>
        {
            _.Schema = schema;
            _.Query = config.Query;
            _.Variables = config.Variables.ToInputs();
            _.Root = config.Root;
            _.ThrowOnUnhandledException = config.ThrowOnUnhandledException;
            _.Listeners.AddRange(config.Listeners);
        },
                   queryResult));
    }
Пример #8
0
            public async Task MetadataReturnsCorrectDocuments()
            {
                // Given
                List <object>   values    = new List <object>();
                CreateDocuments documents = new CreateDocuments(
                    new Dictionary <string, object> {
                    { "Foo", "a" }
                },
                    new Dictionary <string, object> {
                    { "Foo", "b" }
                },
                    new Dictionary <string, object> {
                    { "Foo", "c" }
                });
                ForEachDocument gatherData = new ExecuteConfig(
                    Config.FromDocument(d =>
                {
                    values.Add(d["Foo"]);
                    return((object)null);
                })).ForEachDocument();

                // When
                IReadOnlyList <IDocument> results = await ExecuteAsync(documents, gatherData);

                // Then
                Assert.AreEqual(3, values.Count);
                CollectionAssert.AreEqual(new[] { "a", "b", "c" }, values);
            }
            public async Task SetsDocumentsInMetadata()
            {
                // Given
                List <IList <string> > content = new List <IList <string> >();
                CountModule            count   = new CountModule("A")
                {
                    AdditionalOutputs   = 7,
                    EnsureInputDocument = true
                };
                GroupDocuments  groupByMany = new GroupDocuments(Config.FromDocument(d => new[] { d.GetInt("A") % 3, 3 }));
                OrderDocuments  orderBy     = new OrderDocuments(Config.FromDocument <int>(Keys.GroupKey));
                ForEachDocument gatherData  = new ExecuteConfig(
                    Config.FromDocument(async d =>
                {
                    List <string> groupContent = await d.GetChildren()
                                                 .ToAsyncEnumerable()
                                                 .SelectAwait(async x => await x.GetContentStringAsync())
                                                 .ToListAsync();
                    content.Add(groupContent.ToList());
                    return((object)null);
                })).ForEachDocument();

                // When
                IReadOnlyList <IDocument> results = await ExecuteAsync(count, groupByMany, orderBy, gatherData);

                // Then
                Assert.AreEqual(4, content.Count);
                CollectionAssert.AreEquivalent(new[] { "3", "6" }, content[0]);
                CollectionAssert.AreEquivalent(new[] { "1", "4", "7" }, content[1]);
                CollectionAssert.AreEquivalent(new[] { "2", "5", "8" }, content[2]);
                CollectionAssert.AreEquivalent(new[] { "1", "2", "3", "4", "5", "6", "7", "8" }, content[3]);
            }
Пример #10
0
        public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig)
        {
            var result = new Dictionary <string, string>();

            try
            {
                var config  = (HTMLAttributeExecuteConfig)executeConfig;
                var htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(_document);

                var scrapeDatas = htmlDoc.DocumentNode.SelectNodes(config.Pattern);
                if (scrapeDatas != null)
                {
                    foreach (var scrapeData in scrapeDatas)
                    {
                        string key      = Guid.NewGuid().ToString();
                        string dicValue = "";
                        string attr     = config.HtmlAttribute;
                        dicValue = scrapeData.Attributes[attr].Value;
                        if (!string.IsNullOrEmpty(config.EnclosePrevious))
                        {
                            dicValue = config.EnclosePrevious + dicValue;
                        }
                        result.Add(key, dicValue);
                    }
                }
            }
            catch (Exception ex)
            {
            }

            return(result);
        }
Пример #11
0
            public async Task SpecifiedPipelineDocumentsAreReturnedInCorrectOrder()
            {
                // Given
                List <string>      content           = new List <string>();
                IServiceCollection serviceCollection = new ServiceCollection()
                                                       .AddSingleton <ILoggerProvider>(new TestLoggerProvider());
                Engine engine = new Engine(serviceCollection);
                CancellationTokenSource cancellationTokenSource = new CancellationTokenSource();
                ForEachDocument         gatherData = new ExecuteConfig(
                    Config.FromDocument(async d => content.Add(await d.GetContentStringAsync()))).ForEachDocument();

                engine.Pipelines.Add("Foo", new TestPipeline(new CreateDocuments("A", "B", "C", "D")));
                engine.Pipelines.Add("Bar", new TestPipeline(new CreateDocuments("E", "F")));
                engine.Pipelines.Add("Baz", new TestPipeline(new CreateDocuments("G", "H")));
                engine.Pipelines.Add(
                    new TestPipeline(new ReplaceDocuments("Baz", "Foo"), gatherData)
                    .WithDependencies("Foo", "Bar", "Baz"));

                // When
                await engine.ExecuteAsync(cancellationTokenSource.Token);

                // Then
                Assert.AreEqual(6, content.Count);
                CollectionAssert.AreEquivalent(new[] { "G", "H", "A", "B", "C", "D" }, content);
            }
            public async Task GroupByMetadataKeyWithMissingMetadata()
            {
                // Given
                List <int>  groupKey = new List <int>();
                CountModule count    = new CountModule("A")
                {
                    AdditionalOutputs   = 7,
                    EnsureInputDocument = true
                };
                ForEachDocument meta = new ExecuteConfig(
                    Config.FromDocument(d =>
                {
                    int groupMetadata = d.GetInt("A") % 3;
                    return(groupMetadata == 0 ? d : d.Clone(new MetadataItems {
                        { "GroupMetadata", new object[] { groupMetadata, 3 } }
                    }));
                })).ForEachDocument();
                GroupDocuments  groupByMany = new GroupDocuments("GroupMetadata");
                ForEachDocument gatherData  = new ExecuteConfig(
                    Config.FromDocument(d =>
                {
                    groupKey.Add(d.GetInt(Keys.GroupKey));
                    return((object)null);
                })).ForEachDocument();

                // When
                IReadOnlyList <IDocument> results = await ExecuteAsync(count, meta, groupByMany, gatherData);

                // Then
                CollectionAssert.AreEquivalent(new[] { 1, 2, 3 }, groupKey);
            }
            public void ContextConfigDoesNotThrowForNullResult()
            {
                // Given
                ExecuteConfig execute = new ExecuteConfig(Config.FromContext(_ => (object)null));

                // When, Then
                Should.NotThrow(() => ExecuteAsync(Array.Empty <TestDocument>(), execute).GetAwaiter().GetResult());
            }
            public async Task DocumentConfigDoesNotThrowForNullResult()
            {
                // Given
                ExecuteConfig execute = new ExecuteConfig(Config.FromDocument((_, __) => (object)null));

                // When
                await ExecuteAsync(execute);

                // Then
            }
            public async Task ValueConfigReturnsNewDocumentWithStringContent()
            {
                // Given
                ExecuteConfig execute = new ExecuteConfig("Foo");

                // When
                IReadOnlyList <TestDocument> results = await ExecuteAsync(execute);

                // Then
                results.Single().Content.ShouldBe("Foo");
            }
            public async Task ValueConfigReturnsNewDocumentsWithStringContent()
            {
                // Given
                ExecuteConfig execute = new ExecuteConfig(new[] { "Foo", "Bar" });

                // When
                IReadOnlyList <TestDocument> results = await ExecuteAsync(execute);

                // Then
                results.Select(x => x.Content).ShouldBe(new[] { "Foo", "Bar" });
            }
            public async Task ContextConfigReturnsDocumentForSingleResultDocument()
            {
                // Given
                TestDocument  document = new TestDocument();
                ExecuteConfig execute  = new ExecuteConfig(Config.FromContext(_ => document));

                // When
                TestDocument result = await ExecuteAsync(Array.Empty <TestDocument>(), execute).SingleAsync();

                // Then
                result.ShouldBe(document);
            }
Пример #18
0
        public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig)
        {
            var result      = new Dictionary <string, string>();
            var jDocument   = JsonConvert.DeserializeObject <JToken>(_document);
            var scrapeDatas = jDocument.SelectTokens(executeConfig.Pattern);

            foreach (var scrapeData in scrapeDatas)
            {
                string dicValue = scrapeData.Value <string>();
                string key      = Guid.NewGuid().ToString();
                result.Add(key, dicValue);
            }

            return(result);
        }
Пример #19
0
        protected ExecuteConfig BuildExecuteConfig(JToken config)
        {
            ExecuteConfig result   = null;
            var           type     = config.SelectToken("ScrapeType").Value <string>();
            var           stepType = (ScrapingType)Enum.Parse(typeof(ScrapingType), type);

            switch (stepType)
            {
            case ScrapingType.HTMLValue:
            case ScrapingType.HTMLDoc:
            case ScrapingType.JsonDoc:
            case ScrapingType.JsonVlue:
                result = JsonConvert.DeserializeObject <ExecuteConfig>(config.ToString());
                break;

            case ScrapingType.ApiUrl:
            case ScrapingType.WebUrl:
                result = JsonConvert.DeserializeObject <NavigateExecuteConfig>(config.ToString());
                break;

            case ScrapingType.Text:
                result = new TextExecutorConfig();
                result.SetData(config);
                break;

            case ScrapingType.HTMLAttribute:
                result = JsonConvert.DeserializeObject <HTMLAttributeExecuteConfig>(config.ToString());
                break;

            case ScrapingType.WebDriver:
                break;

            case ScrapingType.ReplaceTemp:
                result = JsonConvert.DeserializeObject <ReplaceTempExecuteConfig>(config.ToString());
                break;

            case ScrapingType.Paging:
                result = JsonConvert.DeserializeObject <PagingExecuteConfig>(config.ToString());
                break;

            case ScrapingType.Header:
                break;

            default:
                break;
            }
            return(result);
        }
            public async Task DocumentConfigReturnsDocumentForSingleResultDocument()
            {
                // Given
                TestDocument document = new TestDocument();
                CountModule  count    = new CountModule("A")
                {
                    EnsureInputDocument = true
                };
                ExecuteConfig execute = new ExecuteConfig(Config.FromDocument((_, __) => document));

                // When
                IReadOnlyList <TestDocument> result = await ExecuteAsync(count, execute);

                // Then
                CollectionAssert.AreEquivalent(document, result.Single());
            }
            public async Task ContextConfigReturnsInputsForNullResult()
            {
                // Given
                TestDocument[] inputs =
                {
                    new TestDocument(),
                    new TestDocument()
                };
                ExecuteConfig execute = new ExecuteConfig(Config.FromContext(_ => (object)null));

                // When
                IReadOnlyList <TestDocument> outputs = await ExecuteAsync(inputs, execute);

                // Then
                CollectionAssert.AreEqual(inputs, outputs);
            }
            public async Task DocumentConfigReturnsInputDocumentsForAction()
            {
                // Given
                TestDocument[] inputs =
                {
                    new TestDocument("Foo"),
                    new TestDocument("Bar")
                };
                ExecuteConfig execute = new ExecuteConfig(Config.FromDocument((doc, ctx) => { _ = 1 + 1; }));

                // When
                IReadOnlyList <TestDocument> results = await ExecuteAsync(inputs, execute);

                // Then
                results.Select(x => x.Content).ShouldBe(new[] { "Foo", "Bar" });
            }
            public async Task DocumentConfigSetsNewContent()
            {
                // Given
                TestDocument[] inputs =
                {
                    new TestDocument(),
                    new TestDocument()
                };
                int           count   = 0;
                ExecuteConfig execute = new ExecuteConfig(Config.FromDocument((d, c) => (object)count++));

                // When
                IReadOnlyList <TestDocument> results = await ExecuteAsync(inputs, execute);

                // Then
                results.Select(x => x.Content).ShouldBe(new[] { "0", "1" }, true);
            }
Пример #24
0
            public async Task ContentReturnsCorrectDocuments()
            {
                // Given
                List <string>   content    = new List <string>();
                CreateDocuments documents  = new CreateDocuments("A", "B", "C", "D");
                ForEachDocument gatherData = new ExecuteConfig(
                    Config.FromDocument(async d =>
                {
                    content.Add(await d.GetStringAsync());
                    return((object)null);
                })).ForEachDocument();

                // When
                IReadOnlyList <IDocument> results = await ExecuteAsync(documents, gatherData);

                // Then
                Assert.AreEqual(4, content.Count);
                CollectionAssert.AreEqual(new[] { "A", "B", "C", "D" }, content);
            }
Пример #25
0
        public Dictionary <string, string> ScrapeData(ExecuteConfig ExecuteConfig)
        {
            var    result = new Dictionary <string, string>();
            var    config = (TextExecutorConfig)ExecuteConfig;
            string key    = Guid.NewGuid().ToString();
            // Get Text Handle config
            var textHandleConfig = config.TextHandleConfig;

            string docResult = "";

            if (!string.IsNullOrEmpty(_document))
            {
                switch (textHandleConfig.TextHanldeType)
                {
                case TextHanldeType.Split:
                    var splitConfig = (TextSplitingHandleConfig)textHandleConfig;
                    docResult = SlipHandler(splitConfig);
                    break;

                case TextHanldeType.Regex:
                    // Support Regex
                    var regexConfig = (TextRegexHandleConfig)textHandleConfig;
                    docResult = RegexHandler(regexConfig);
                    break;

                case TextHanldeType.Join:
                    //docResult = JoinText(ExecuteConfig);
                    break;

                case TextHanldeType.Replace:
                    var replaceConfig = (TextReplaceHandleConfig)textHandleConfig;
                    docResult = ReplaceText(replaceConfig);
                    break;

                default:
                    docResult = "";
                    break;
                }
            }
            result.Add(key, docResult);
            return(result);
        }
            public async Task ContextConfigRunsModuleAgainstInputDocuments()
            {
                // Given
                TestDocument[] inputs =
                {
                    new TestDocument(),
                    new TestDocument()
                };
                int           count   = 0;
                ExecuteConfig execute = new ExecuteConfig(Config.FromContext(c =>
                {
                    count++;
                    return((object)null);
                }));

                // When
                await ExecuteAsync(inputs, execute);

                // Then
                count.ShouldBe(1);
            }
        public string Execute(ExecuteConfig config)
        {
            var    executeConfig = (WebDriveExecuteConfig)config;
            string result        = "";

            switch (executeConfig.EventType)
            {
            case EventType.Click:
                Click();
                break;

            case EventType.Navigate:
                Navigate(executeConfig.Pattern);
                break;

            case EventType.Find:
                FindElement(executeConfig.FindType, executeConfig.Pattern);
                break;

            case EventType.Get:
                GetElement(executeConfig.FindType, executeConfig.Pattern);
                break;

            case EventType.FindFromElements:
                FindFromElements(executeConfig.FindType, executeConfig.Pattern);
                break;

            default:
                break;
            }

            var responseType = executeConfig.ResponseType;

            // Get Data If needed
            if (_driver != null)
            {
                result = GetDataFromDriver(responseType);
            }
            return(result);
        }
            public async Task CaseInsensitiveStringComparer()
            {
                // Given
                List <object> groupKey = new List <object>();
                ExecuteConfig meta     = new ExecuteConfig(Config.FromContext(
                                                               c => new IDocument[]
                {
                    c.CreateDocument(new MetadataItems {
                        { "Tag", new object[] { "A", "b" } }
                    }),
                    c.CreateDocument(new MetadataItems {
                        { "Tag", new object[] { "B" } }
                    }),
                    c.CreateDocument(new MetadataItems {
                        { "Tag", new object[] { "C" } }
                    }),
                    c.CreateDocument(new MetadataItems {
                        { "Tag", new object[] { "c" } }
                    }),
                    c.CreateDocument(new MetadataItems {
                        { "Tag", new object[] { 1 } }
                    }),
                    c.CreateDocument(new MetadataItems {
                        { "Tag", new object[] { "1" } }
                    })
                }));
                GroupDocuments  groupByMany = new GroupDocuments("Tag").WithComparer(StringComparer.OrdinalIgnoreCase);
                ForEachDocument gatherData  = new ExecuteConfig(
                    Config.FromDocument(d =>
                {
                    groupKey.Add(d.Get(Keys.GroupKey));
                    return((object)null);
                })).ForEachDocument();

                // When
                IReadOnlyList <IDocument> results = await ExecuteAsync(meta, groupByMany, gatherData);

                // Then
                CollectionAssert.AreEquivalent(new object[] { "A", "b", "C", 1 }, groupKey);
            }
Пример #29
0
        public Dictionary <string, string> ScrapeData(ExecuteConfig executeConfig)
        {
            var result = new Dictionary <string, string>();

            try
            {
                //if (executeConfig.HeaderConfigs!=null)
                //{
                //    var url = executeConfig?.Pattern;
                //    var docResult = _scrape.GetDocumentWithHeader(url, executeConfig.HeaderConfigs, executeConfig.Interval);
                //    string key = string.IsNullOrEmpty(executeConfig.AttributeName) ?
                //              Guid.NewGuid().ToString() : executeConfig.AttributeName;
                //    result.Add(key, docResult);
                //}
                // Get URL
            }
            catch (Exception)
            {
            }

            return(result);
        }
            public async Task SetsCorrectMetadata()
            {
                // Given
                List <int>  groupKey = new List <int>();
                CountModule count    = new CountModule("A")
                {
                    AdditionalOutputs   = 7,
                    EnsureInputDocument = true
                };
                GroupDocuments  groupByMany = new GroupDocuments(Config.FromDocument(d => new[] { d.GetInt("A") % 3, 3 }));
                ForEachDocument gatherData  = new ExecuteConfig(
                    Config.FromDocument(d =>
                {
                    groupKey.Add(d.GetInt(Keys.GroupKey));
                    return(d);
                })).ForEachDocument();

                // When
                IReadOnlyList <IDocument> results = await ExecuteAsync(count, groupByMany, gatherData);

                // Then
                CollectionAssert.AreEquivalent(new[] { 0, 1, 2, 3 }, groupKey);
            }