static async Task BookerPrize() { //var writeData = new SenderToConsole(); var v = new Verifier(); var booker = new ReceiverHtmlTables("https://en.wikipedia.org/wiki/Booker_Prize", Encoding.UTF8); var data = await booker.TransformData(null); data = await v.TransformData(data); data = await new FilterTablesWithColumn("Author").TransformData(data); data = await v.TransformData(data); data = await new FilterColumnData("Author", "Author not like '*ohn*'").TransformData(data); data = await v.TransformData(data); data = await new TransformerHTMLAttribute("Author_html", "href", "AuthorWiki").TransformData(data); data = await v.TransformData(data); data = await new TransformerAddColumnExpressionByTable(data.Metadata.Tables.First().Name, "'https://en.wikipedia.org'+ AuthorWiki ", "AuthorFullWiki").TransformData(data); data = await v.TransformData(data); var gatherLaureatesWiki = new TransformerOneTableToMulti <BaseObjectInSerial <ReceiverHtmlList, TransformerToOneTable> >( "file", "AuthorFullWiki", new StankinsCommon.CtorDictionary() { { nameof(Encoding), Encoding.UTF8 } } ); data = await gatherLaureatesWiki.TransformData(data); data = await v.TransformData(data); data = await new FilterColumnDataWithRegex("li", @"(\([0-9]{4})|(, [0-9]{4})").TransformData(data); data = await v.TransformData(data); //data = await new ChangeTableNamesRegex(@"(?:.+\/)((?<name>.+))").TransformData(data); data = await v.TransformData(data); data = await new ChangeColumnName("li", "bookName").TransformData(data); data = await v.TransformData(data); data = await new FilterRemoveColumn("li_html").TransformData(data); data = await new FilterRemoveColumn("Year_html").TransformData(data); data = await new FilterRemoveColumn("Author_html").TransformData(data); data = await new FilterRemoveColumn("Title_html").TransformData(data); data = await new FilterRemoveColumn("Genre(s)_html").TransformData(data); data = await new FilterRemoveColumn("Country_html").TransformData(data); data = await v.TransformData(data); data = await(new TransformTrim()).TransformData(data); data = await v.TransformData(data); data = await new SenderExcel(@"D:\test\booker.xlsx").TransformData(data); data = await v.TransformData(data); var content = System.IO.File.ReadAllText("sqliteCreation.txt"); data = await new SenderRazorTableOneByOne(content, @"D:\test\").TransformData(data); }
public void TestSimpleTable(string fileContents, int numberRows) { IReceive receiver = null; string fileName = nameof(TestReceiverHTMLTables) + nameof(TestSimpleTable); IDataToSent data = null; var nl = Environment.NewLine; $"Given the file {fileName} with Content {fileContents}".w(async() => { await File.WriteAllTextAsync(fileName, fileContents); }); $"When I create the receiver html for the {fileName}".w(() => receiver = new ReceiverHtmlTables(fileName, null)); $"And I read the data".w(async() => data = await receiver.TransformData(null)); $"Then should be a data".w(() => data.Should().NotBeNull()); $"With a table".w(() => { data.DataToBeSentFurther.Should().NotBeNull(); data.DataToBeSentFurther.Count.Should().Be(1); }); $"The number of rows should be {numberRows}".w(() => data.DataToBeSentFurther[0].Rows.Count.Should().Be(numberRows)); }
static async Task Nobel() { //var writeData = new SenderToConsole(); var v = new Verifier(); var nobelLiterature = new ReceiverHtmlTables("https://en.wikipedia.org/wiki/List_of_Nobel_laureates_in_Literature", Encoding.UTF8); var data = await nobelLiterature.TransformData(null); data = await v.TransformData(data); var f = new FilterTablesWithColumn("Laureate"); data = await f.TransformData(data); data = await v.TransformData(data); var justSome = new FilterColumnData("Laureate", "Laureate not like '*ohn*'"); data = await justSome.TransformData(data); data = await v.TransformData(data); var transform = new TransformerHTMLAttribute("Laureate_html", "href", "LaureateWiki"); data = await transform.TransformData(data); data = await v.TransformData(data); var transformPicture = new TransformerHTMLAttribute("Picture_html", "src", "PictureUrl"); data = await transformPicture.TransformData(data); data = await v.TransformData(data); var addSite = new TransformerAddColumnExpressionByTable(data.Metadata.Tables.First().Name, "'https://en.wikipedia.org'+ LaureateWiki ", "LaureateFullWiki"); data = await addSite.TransformData(data); data = await v.TransformData(data); var gatherLaureatesWiki = new TransformerOneTableToMulti <BaseObjectInSerial <ReceiverHtmlList, TransformerToOneTable> >( "file", "LaureateFullWiki", new StankinsCommon.CtorDictionary() { { nameof(Encoding), Encoding.UTF8 } } ); data = await gatherLaureatesWiki.TransformData(data); data = await v.TransformData(data); //var h = new ReceiverHtmlList("https://en.wikipedia.org/wiki/Sully_Prudhomme",Encoding.UTF8); //var data2 = await h.TransformData(null); var yearFilter = new FilterColumnDataWithRegex("li_html", @"(\([0-9]{4})|(, [0-9]{4})"); data = await yearFilter.TransformData(data); data = await v.TransformData(data); var italicFilter = new FilterColumnDataWithRegex("li_html", @"[<]i[>]"); data = await italicFilter.TransformData(data); data = await v.TransformData(data); data = await(new TransformTrim()).TransformData(data); data = await v.TransformData(data); data = await(new FilterRemoveColumnDataGreaterThanLength("li_html", 400)).TransformData(data); //data = await (new TransformerAddColumnExpressionByColumn("li_html", "Len(li_html)", "liLen")).TransformData(data); //var csv = new SenderFileCSV(@"D:\test"); //data = await csv.TransformData(data); data = await new FilterRemoveColumn("li_html").TransformData(data); data = await new FilterRemoveColumn("Picture").TransformData(data); data = await new FilterRemoveColumn("Year_html").TransformData(data); data = await new FilterRemoveColumn("Genre(s)_html").TransformData(data); data = await new FilterRemoveColumn("LaureateWiki").TransformData(data); data = await new FilterRemoveColumn("Country").TransformData(data); data = await new FilterRemoveColumn("Picture_html").TransformData(data); data = await new FilterRemoveColumn("Laureate_html").TransformData(data); data = await new FilterRemoveColumn("Country_html").TransformData(data); data = await new FilterRemoveColumn("Language(s)_html").TransformData(data); data = await new FilterRemoveColumn("Citation_html").TransformData(data); data = await v.TransformData(data); //var regexLast = @"(?:.+\/)((?<name>.+))"; data = await new AddColumnRegex("LaureateFullWiki_origin", @"(?:.+\/)((?<nameAuthor>.+))").TransformData(data); data = await v.TransformData(data); data = await new AddColumnRegex("LaureateFullWiki", @"(?:.+\/)((?<name>.+))").TransformData(data); data = await new FilterRemoveColumn("LaureateFullWiki_origin").TransformData(data); data = await new ChangeTableNamesRegex(@"(?:.+\/)((?<name>.+))").TransformData(data); data = await v.TransformData(data); data = await new ChangeColumnName("li", "bookName").TransformData(data); data = await v.TransformData(data); data = await new SenderExcel(@"D:\test\nobel.xlsx").TransformData(data); data = await new FilterTablesWithColumn("bookName").TransformData(data); data = await v.TransformData(data); data = await new TransformerToOneTable().TransformData(data); data = await v.TransformData(data); var content = System.IO.File.ReadAllText("sqliteCreation.txt"); data = await new SenderRazorTableOneByOne(content, @"D:\test\").TransformData(data); data = await v.TransformData(data); //data = await writeData.TransformData(data); }