static async Task BookerPrize() { //var writeData = new SenderToConsole(); var v = new Verifier(); var booker = new ReceiverHtmlTables("https://en.wikipedia.org/wiki/Booker_Prize", Encoding.UTF8); var data = await booker.TransformData(null); data = await v.TransformData(data); data = await new FilterTablesWithColumn("Author").TransformData(data); data = await v.TransformData(data); data = await new FilterColumnData("Author", "Author not like '*ohn*'").TransformData(data); data = await v.TransformData(data); data = await new TransformerHTMLAttribute("Author_html", "href", "AuthorWiki").TransformData(data); data = await v.TransformData(data); data = await new TransformerAddColumnExpressionByTable(data.Metadata.Tables.First().Name, "'https://en.wikipedia.org'+ AuthorWiki ", "AuthorFullWiki").TransformData(data); data = await v.TransformData(data); var gatherLaureatesWiki = new TransformerOneTableToMulti <BaseObjectInSerial <ReceiverHtmlList, TransformerToOneTable> >( "file", "AuthorFullWiki", new StankinsCommon.CtorDictionary() { { nameof(Encoding), Encoding.UTF8 } } ); data = await gatherLaureatesWiki.TransformData(data); data = await v.TransformData(data); data = await new FilterColumnDataWithRegex("li", @"(\([0-9]{4})|(, [0-9]{4})").TransformData(data); data = await v.TransformData(data); //data = await new ChangeTableNamesRegex(@"(?:.+\/)((?<name>.+))").TransformData(data); data = await v.TransformData(data); data = await new ChangeColumnName("li", "bookName").TransformData(data); data = await v.TransformData(data); data = await new FilterRemoveColumn("li_html").TransformData(data); data = await new FilterRemoveColumn("Year_html").TransformData(data); data = await new FilterRemoveColumn("Author_html").TransformData(data); data = await new FilterRemoveColumn("Title_html").TransformData(data); data = await new FilterRemoveColumn("Genre(s)_html").TransformData(data); data = await new FilterRemoveColumn("Country_html").TransformData(data); data = await v.TransformData(data); data = await(new TransformTrim()).TransformData(data); data = await v.TransformData(data); data = await new SenderExcel(@"D:\test\booker.xlsx").TransformData(data); data = await v.TransformData(data); var content = System.IO.File.ReadAllText("sqliteCreation.txt"); data = await new SenderRazorTableOneByOne(content, @"D:\test\").TransformData(data); }
private static async Task BillGates() { var v = new Verifier(); //var dt = new ReceiverHtmlAHref(@"https://www.gatesnotes.com/Books#All",Encoding.UTF8); //var dt = new ReceiverHtmlRegex(@"C:\Users\Surface1\Documents\bg.txt", Encoding.UTF8, @".(?:href=)(?<book>.+?)(?:#disqus).*?"); var dt = new ReceiverHtmlRegex(@"C:\Users\Surface1\Documents\bg.txt", Encoding.UTF8, @".(?:href=\\"")(?<book>.+?)(?:#disqus).*?"); var data = await dt.TransformData(null); await v.TransformData(data); var books = new FilterRetainColumnDataContains(data.Metadata.Columns[0].Name, "ooks"); data = await books.TransformData(data); await v.TransformData(data); var t = new TransformerOneTableToMulti <ReceiverHtmlMeta>("file", data.Metadata.Columns[0].Name, new CtorDictionary()); data = await t.TransformData(data); await v.TransformData(data); data = await new FilterTablesWithColumn("meta_name").TransformData(data); await v.TransformData(data); data = await new FilterTablesWithColumn("meta_name").TransformData(data); await v.TransformData(data); data = await new TransformerToOneTable().TransformData(data); await v.TransformData(data); books = new FilterRetainColumnDataContains("meta_name", "keywords"); data = await books.TransformData(data); await v.TransformData(data); var excel = new SenderExcel(@"bg.xslx"); data = await excel.TransformData(data); data = await v.TransformData(data); }
static async Task Nobel() { //var writeData = new SenderToConsole(); var v = new Verifier(); var nobelLiterature = new ReceiverHtmlTables("https://en.wikipedia.org/wiki/List_of_Nobel_laureates_in_Literature", Encoding.UTF8); var data = await nobelLiterature.TransformData(null); data = await v.TransformData(data); var f = new FilterTablesWithColumn("Laureate"); data = await f.TransformData(data); data = await v.TransformData(data); var justSome = new FilterColumnData("Laureate", "Laureate not like '*ohn*'"); data = await justSome.TransformData(data); data = await v.TransformData(data); var transform = new TransformerHTMLAttribute("Laureate_html", "href", "LaureateWiki"); data = await transform.TransformData(data); data = await v.TransformData(data); var transformPicture = new TransformerHTMLAttribute("Picture_html", "src", "PictureUrl"); data = await transformPicture.TransformData(data); data = await v.TransformData(data); var addSite = new TransformerAddColumnExpressionByTable(data.Metadata.Tables.First().Name, "'https://en.wikipedia.org'+ LaureateWiki ", "LaureateFullWiki"); data = await addSite.TransformData(data); data = await v.TransformData(data); var gatherLaureatesWiki = new TransformerOneTableToMulti <BaseObjectInSerial <ReceiverHtmlList, TransformerToOneTable> >( "file", "LaureateFullWiki", new StankinsCommon.CtorDictionary() { { nameof(Encoding), Encoding.UTF8 } } ); data = await gatherLaureatesWiki.TransformData(data); data = await v.TransformData(data); //var h = new ReceiverHtmlList("https://en.wikipedia.org/wiki/Sully_Prudhomme",Encoding.UTF8); //var data2 = await h.TransformData(null); var yearFilter = new FilterColumnDataWithRegex("li_html", @"(\([0-9]{4})|(, [0-9]{4})"); data = await yearFilter.TransformData(data); data = await v.TransformData(data); var italicFilter = new FilterColumnDataWithRegex("li_html", @"[<]i[>]"); data = await italicFilter.TransformData(data); data = await v.TransformData(data); data = await(new TransformTrim()).TransformData(data); data = await v.TransformData(data); data = await(new FilterRemoveColumnDataGreaterThanLength("li_html", 400)).TransformData(data); //data = await (new TransformerAddColumnExpressionByColumn("li_html", "Len(li_html)", "liLen")).TransformData(data); //var csv = new SenderFileCSV(@"D:\test"); //data = await csv.TransformData(data); data = await new FilterRemoveColumn("li_html").TransformData(data); data = await new FilterRemoveColumn("Picture").TransformData(data); data = await new FilterRemoveColumn("Year_html").TransformData(data); data = await new FilterRemoveColumn("Genre(s)_html").TransformData(data); data = await new FilterRemoveColumn("LaureateWiki").TransformData(data); data = await new FilterRemoveColumn("Country").TransformData(data); data = await new FilterRemoveColumn("Picture_html").TransformData(data); data = await new FilterRemoveColumn("Laureate_html").TransformData(data); data = await new FilterRemoveColumn("Country_html").TransformData(data); data = await new FilterRemoveColumn("Language(s)_html").TransformData(data); data = await new FilterRemoveColumn("Citation_html").TransformData(data); data = await v.TransformData(data); //var regexLast = @"(?:.+\/)((?<name>.+))"; data = await new AddColumnRegex("LaureateFullWiki_origin", @"(?:.+\/)((?<nameAuthor>.+))").TransformData(data); data = await v.TransformData(data); data = await new AddColumnRegex("LaureateFullWiki", @"(?:.+\/)((?<name>.+))").TransformData(data); data = await new FilterRemoveColumn("LaureateFullWiki_origin").TransformData(data); data = await new ChangeTableNamesRegex(@"(?:.+\/)((?<name>.+))").TransformData(data); data = await v.TransformData(data); data = await new ChangeColumnName("li", "bookName").TransformData(data); data = await v.TransformData(data); data = await new SenderExcel(@"D:\test\nobel.xlsx").TransformData(data); data = await new FilterTablesWithColumn("bookName").TransformData(data); data = await v.TransformData(data); data = await new TransformerToOneTable().TransformData(data); data = await v.TransformData(data); var content = System.IO.File.ReadAllText("sqliteCreation.txt"); data = await new SenderRazorTableOneByOne(content, @"D:\test\").TransformData(data); data = await v.TransformData(data); //data = await writeData.TransformData(data); }