Example #1
0
        static async Task BookerPrize()
        {
            //var writeData = new SenderToConsole();
            var v      = new Verifier();
            var booker = new ReceiverHtmlTables("https://en.wikipedia.org/wiki/Booker_Prize", Encoding.UTF8);
            var data   = await booker.TransformData(null);

            data = await v.TransformData(data);

            data = await new FilterTablesWithColumn("Author").TransformData(data);
            data = await v.TransformData(data);

            data = await new FilterColumnData("Author", "Author not like '*ohn*'").TransformData(data);
            data = await v.TransformData(data);

            data = await new TransformerHTMLAttribute("Author_html", "href", "AuthorWiki").TransformData(data);
            data = await v.TransformData(data);

            data = await new TransformerAddColumnExpressionByTable(data.Metadata.Tables.First().Name, "'https://en.wikipedia.org'+ AuthorWiki ", "AuthorFullWiki").TransformData(data);
            data = await v.TransformData(data);

            var gatherLaureatesWiki = new TransformerOneTableToMulti <BaseObjectInSerial <ReceiverHtmlList, TransformerToOneTable> >(
                "file", "AuthorFullWiki", new StankinsCommon.CtorDictionary()
            {
                { nameof(Encoding), Encoding.UTF8 }
            }
                );

            data = await gatherLaureatesWiki.TransformData(data);

            data = await v.TransformData(data);

            data = await new FilterColumnDataWithRegex("li", @"(\([0-9]{4})|(, [0-9]{4})").TransformData(data);
            data = await v.TransformData(data);

            //data = await new ChangeTableNamesRegex(@"(?:.+\/)((?<name>.+))").TransformData(data);
            data = await v.TransformData(data);

            data = await new ChangeColumnName("li", "bookName").TransformData(data);
            data = await v.TransformData(data);

            data = await new FilterRemoveColumn("li_html").TransformData(data);
            data = await new FilterRemoveColumn("Year_html").TransformData(data);
            data = await new FilterRemoveColumn("Author_html").TransformData(data);
            data = await new FilterRemoveColumn("Title_html").TransformData(data);
            data = await new FilterRemoveColumn("Genre(s)_html").TransformData(data);
            data = await new FilterRemoveColumn("Country_html").TransformData(data);
            data = await v.TransformData(data);

            data = await(new TransformTrim()).TransformData(data);
            data = await v.TransformData(data);

            data = await new SenderExcel(@"D:\test\booker.xlsx").TransformData(data);
            data = await v.TransformData(data);

            var content = System.IO.File.ReadAllText("sqliteCreation.txt");

            data = await new SenderRazorTableOneByOne(content, @"D:\test\").TransformData(data);
        }
Example #2
0
        public void TestSimpleTable(string fileContents, int numberRows)
        {
            IReceive receiver = null;

            string      fileName = nameof(TestReceiverHTMLTables) + nameof(TestSimpleTable);
            IDataToSent data     = null;
            var         nl       = Environment.NewLine;

            $"Given the file {fileName} with Content {fileContents}".w(async() =>
            {
                await File.WriteAllTextAsync(fileName, fileContents);
            });
            $"When I create the receiver html for the {fileName}".w(() => receiver = new ReceiverHtmlTables(fileName, null));
            $"And I read the data".w(async() => data = await receiver.TransformData(null));
            $"Then should be a data".w(() => data.Should().NotBeNull());
            $"With a table".w(() =>
            {
                data.DataToBeSentFurther.Should().NotBeNull();
                data.DataToBeSentFurther.Count.Should().Be(1);
            });
            $"The number of rows should be {numberRows}".w(() => data.DataToBeSentFurther[0].Rows.Count.Should().Be(numberRows));
        }
Example #3
0
        static async Task Nobel()
        {
            //var writeData = new SenderToConsole();
            var v = new Verifier();
            var nobelLiterature = new ReceiverHtmlTables("https://en.wikipedia.org/wiki/List_of_Nobel_laureates_in_Literature", Encoding.UTF8);
            var data            = await nobelLiterature.TransformData(null);

            data = await v.TransformData(data);


            var f = new FilterTablesWithColumn("Laureate");

            data = await f.TransformData(data);

            data = await v.TransformData(data);


            var justSome = new FilterColumnData("Laureate", "Laureate not like '*ohn*'");

            data = await justSome.TransformData(data);

            data = await v.TransformData(data);

            var transform = new TransformerHTMLAttribute("Laureate_html", "href", "LaureateWiki");

            data = await transform.TransformData(data);

            data = await v.TransformData(data);

            var transformPicture = new TransformerHTMLAttribute("Picture_html", "src", "PictureUrl");

            data = await transformPicture.TransformData(data);

            data = await v.TransformData(data);

            var addSite = new TransformerAddColumnExpressionByTable(data.Metadata.Tables.First().Name, "'https://en.wikipedia.org'+ LaureateWiki ", "LaureateFullWiki");

            data = await addSite.TransformData(data);

            data = await v.TransformData(data);

            var gatherLaureatesWiki = new TransformerOneTableToMulti <BaseObjectInSerial <ReceiverHtmlList, TransformerToOneTable> >(
                "file", "LaureateFullWiki", new StankinsCommon.CtorDictionary()
            {
                { nameof(Encoding), Encoding.UTF8 }
            }
                );

            data = await gatherLaureatesWiki.TransformData(data);

            data = await v.TransformData(data);

            //var h = new ReceiverHtmlList("https://en.wikipedia.org/wiki/Sully_Prudhomme",Encoding.UTF8);
            //var data2 = await h.TransformData(null);
            var yearFilter = new FilterColumnDataWithRegex("li_html", @"(\([0-9]{4})|(, [0-9]{4})");

            data = await yearFilter.TransformData(data);

            data = await v.TransformData(data);

            var italicFilter = new FilterColumnDataWithRegex("li_html", @"[<]i[>]");

            data = await italicFilter.TransformData(data);

            data = await v.TransformData(data);

            data = await(new TransformTrim()).TransformData(data);
            data = await v.TransformData(data);

            data = await(new FilterRemoveColumnDataGreaterThanLength("li_html", 400)).TransformData(data);
            //data = await (new TransformerAddColumnExpressionByColumn("li_html", "Len(li_html)", "liLen")).TransformData(data);
            //var csv = new SenderFileCSV(@"D:\test");
            //data = await csv.TransformData(data);
            data = await new FilterRemoveColumn("li_html").TransformData(data);
            data = await new FilterRemoveColumn("Picture").TransformData(data);
            data = await new FilterRemoveColumn("Year_html").TransformData(data);
            data = await new FilterRemoveColumn("Genre(s)_html").TransformData(data);
            data = await new FilterRemoveColumn("LaureateWiki").TransformData(data);
            data = await new FilterRemoveColumn("Country").TransformData(data);
            data = await new FilterRemoveColumn("Picture_html").TransformData(data);
            data = await new FilterRemoveColumn("Laureate_html").TransformData(data);
            data = await new FilterRemoveColumn("Country_html").TransformData(data);
            data = await new FilterRemoveColumn("Language(s)_html").TransformData(data);
            data = await new FilterRemoveColumn("Citation_html").TransformData(data);

            data = await v.TransformData(data);

            //var regexLast = @"(?:.+\/)((?<name>.+))";
            data = await new AddColumnRegex("LaureateFullWiki_origin", @"(?:.+\/)((?<nameAuthor>.+))").TransformData(data);
            data = await v.TransformData(data);

            data = await new AddColumnRegex("LaureateFullWiki", @"(?:.+\/)((?<name>.+))").TransformData(data);

            data = await new FilterRemoveColumn("LaureateFullWiki_origin").TransformData(data);

            data = await new ChangeTableNamesRegex(@"(?:.+\/)((?<name>.+))").TransformData(data);
            data = await v.TransformData(data);

            data = await new ChangeColumnName("li", "bookName").TransformData(data);
            data = await v.TransformData(data);

            data = await new SenderExcel(@"D:\test\nobel.xlsx").TransformData(data);


            data = await new FilterTablesWithColumn("bookName").TransformData(data);
            data = await v.TransformData(data);

            data = await new TransformerToOneTable().TransformData(data);
            data = await v.TransformData(data);

            var content = System.IO.File.ReadAllText("sqliteCreation.txt");

            data = await new SenderRazorTableOneByOne(content, @"D:\test\").TransformData(data);
            data = await v.TransformData(data);

            //data = await writeData.TransformData(data);
        }