コード例 #1
0
ファイル: Program.cs プロジェクト: vivekraj59/stankins
        static async Task BookerPrize()
        {
            //var writeData = new SenderToConsole();
            var v      = new Verifier();
            var booker = new ReceiverHtmlTables("https://en.wikipedia.org/wiki/Booker_Prize", Encoding.UTF8);
            var data   = await booker.TransformData(null);

            data = await v.TransformData(data);

            data = await new FilterTablesWithColumn("Author").TransformData(data);
            data = await v.TransformData(data);

            data = await new FilterColumnData("Author", "Author not like '*ohn*'").TransformData(data);
            data = await v.TransformData(data);

            data = await new TransformerHTMLAttribute("Author_html", "href", "AuthorWiki").TransformData(data);
            data = await v.TransformData(data);

            data = await new TransformerAddColumnExpressionByTable(data.Metadata.Tables.First().Name, "'https://en.wikipedia.org'+ AuthorWiki ", "AuthorFullWiki").TransformData(data);
            data = await v.TransformData(data);

            var gatherLaureatesWiki = new TransformerOneTableToMulti <BaseObjectInSerial <ReceiverHtmlList, TransformerToOneTable> >(
                "file", "AuthorFullWiki", new StankinsCommon.CtorDictionary()
            {
                { nameof(Encoding), Encoding.UTF8 }
            }
                );

            data = await gatherLaureatesWiki.TransformData(data);

            data = await v.TransformData(data);

            data = await new FilterColumnDataWithRegex("li", @"(\([0-9]{4})|(, [0-9]{4})").TransformData(data);
            data = await v.TransformData(data);

            //data = await new ChangeTableNamesRegex(@"(?:.+\/)((?<name>.+))").TransformData(data);
            data = await v.TransformData(data);

            data = await new ChangeColumnName("li", "bookName").TransformData(data);
            data = await v.TransformData(data);

            data = await new FilterRemoveColumn("li_html").TransformData(data);
            data = await new FilterRemoveColumn("Year_html").TransformData(data);
            data = await new FilterRemoveColumn("Author_html").TransformData(data);
            data = await new FilterRemoveColumn("Title_html").TransformData(data);
            data = await new FilterRemoveColumn("Genre(s)_html").TransformData(data);
            data = await new FilterRemoveColumn("Country_html").TransformData(data);
            data = await v.TransformData(data);

            data = await(new TransformTrim()).TransformData(data);
            data = await v.TransformData(data);

            data = await new SenderExcel(@"D:\test\booker.xlsx").TransformData(data);
            data = await v.TransformData(data);

            var content = System.IO.File.ReadAllText("sqliteCreation.txt");

            data = await new SenderRazorTableOneByOne(content, @"D:\test\").TransformData(data);
        }
コード例 #2
0
ファイル: Program.cs プロジェクト: vivekraj59/stankins
        private static async Task BillGates()
        {
            var v = new Verifier();

            //var dt = new ReceiverHtmlAHref(@"https://www.gatesnotes.com/Books#All",Encoding.UTF8);
            //var dt = new ReceiverHtmlRegex(@"C:\Users\Surface1\Documents\bg.txt", Encoding.UTF8, @".(?:href=)(?<book>.+?)(?:#disqus).*?");
            var dt   = new ReceiverHtmlRegex(@"C:\Users\Surface1\Documents\bg.txt", Encoding.UTF8, @".(?:href=\\"")(?<book>.+?)(?:#disqus).*?");
            var data = await dt.TransformData(null);

            await v.TransformData(data);

            var books = new FilterRetainColumnDataContains(data.Metadata.Columns[0].Name, "ooks");

            data = await books.TransformData(data);

            await v.TransformData(data);

            var t = new TransformerOneTableToMulti <ReceiverHtmlMeta>("file", data.Metadata.Columns[0].Name, new CtorDictionary());

            data = await t.TransformData(data);

            await v.TransformData(data);

            data = await new FilterTablesWithColumn("meta_name").TransformData(data);
            await v.TransformData(data);

            data = await new FilterTablesWithColumn("meta_name").TransformData(data);
            await v.TransformData(data);

            data = await new TransformerToOneTable().TransformData(data);
            await v.TransformData(data);

            books = new FilterRetainColumnDataContains("meta_name", "keywords");
            data  = await books.TransformData(data);

            await v.TransformData(data);

            var excel = new SenderExcel(@"bg.xslx");

            data = await excel.TransformData(data);

            data = await v.TransformData(data);
        }
コード例 #3
0
ファイル: Program.cs プロジェクト: vivekraj59/stankins
        static async Task Nobel()
        {
            //var writeData = new SenderToConsole();
            var v = new Verifier();
            var nobelLiterature = new ReceiverHtmlTables("https://en.wikipedia.org/wiki/List_of_Nobel_laureates_in_Literature", Encoding.UTF8);
            var data            = await nobelLiterature.TransformData(null);

            data = await v.TransformData(data);


            var f = new FilterTablesWithColumn("Laureate");

            data = await f.TransformData(data);

            data = await v.TransformData(data);


            var justSome = new FilterColumnData("Laureate", "Laureate not like '*ohn*'");

            data = await justSome.TransformData(data);

            data = await v.TransformData(data);

            var transform = new TransformerHTMLAttribute("Laureate_html", "href", "LaureateWiki");

            data = await transform.TransformData(data);

            data = await v.TransformData(data);

            var transformPicture = new TransformerHTMLAttribute("Picture_html", "src", "PictureUrl");

            data = await transformPicture.TransformData(data);

            data = await v.TransformData(data);

            var addSite = new TransformerAddColumnExpressionByTable(data.Metadata.Tables.First().Name, "'https://en.wikipedia.org'+ LaureateWiki ", "LaureateFullWiki");

            data = await addSite.TransformData(data);

            data = await v.TransformData(data);

            var gatherLaureatesWiki = new TransformerOneTableToMulti <BaseObjectInSerial <ReceiverHtmlList, TransformerToOneTable> >(
                "file", "LaureateFullWiki", new StankinsCommon.CtorDictionary()
            {
                { nameof(Encoding), Encoding.UTF8 }
            }
                );

            data = await gatherLaureatesWiki.TransformData(data);

            data = await v.TransformData(data);

            //var h = new ReceiverHtmlList("https://en.wikipedia.org/wiki/Sully_Prudhomme",Encoding.UTF8);
            //var data2 = await h.TransformData(null);
            var yearFilter = new FilterColumnDataWithRegex("li_html", @"(\([0-9]{4})|(, [0-9]{4})");

            data = await yearFilter.TransformData(data);

            data = await v.TransformData(data);

            var italicFilter = new FilterColumnDataWithRegex("li_html", @"[<]i[>]");

            data = await italicFilter.TransformData(data);

            data = await v.TransformData(data);

            data = await(new TransformTrim()).TransformData(data);
            data = await v.TransformData(data);

            data = await(new FilterRemoveColumnDataGreaterThanLength("li_html", 400)).TransformData(data);
            //data = await (new TransformerAddColumnExpressionByColumn("li_html", "Len(li_html)", "liLen")).TransformData(data);
            //var csv = new SenderFileCSV(@"D:\test");
            //data = await csv.TransformData(data);
            data = await new FilterRemoveColumn("li_html").TransformData(data);
            data = await new FilterRemoveColumn("Picture").TransformData(data);
            data = await new FilterRemoveColumn("Year_html").TransformData(data);
            data = await new FilterRemoveColumn("Genre(s)_html").TransformData(data);
            data = await new FilterRemoveColumn("LaureateWiki").TransformData(data);
            data = await new FilterRemoveColumn("Country").TransformData(data);
            data = await new FilterRemoveColumn("Picture_html").TransformData(data);
            data = await new FilterRemoveColumn("Laureate_html").TransformData(data);
            data = await new FilterRemoveColumn("Country_html").TransformData(data);
            data = await new FilterRemoveColumn("Language(s)_html").TransformData(data);
            data = await new FilterRemoveColumn("Citation_html").TransformData(data);

            data = await v.TransformData(data);

            //var regexLast = @"(?:.+\/)((?<name>.+))";
            data = await new AddColumnRegex("LaureateFullWiki_origin", @"(?:.+\/)((?<nameAuthor>.+))").TransformData(data);
            data = await v.TransformData(data);

            data = await new AddColumnRegex("LaureateFullWiki", @"(?:.+\/)((?<name>.+))").TransformData(data);

            data = await new FilterRemoveColumn("LaureateFullWiki_origin").TransformData(data);

            data = await new ChangeTableNamesRegex(@"(?:.+\/)((?<name>.+))").TransformData(data);
            data = await v.TransformData(data);

            data = await new ChangeColumnName("li", "bookName").TransformData(data);
            data = await v.TransformData(data);

            data = await new SenderExcel(@"D:\test\nobel.xlsx").TransformData(data);


            data = await new FilterTablesWithColumn("bookName").TransformData(data);
            data = await v.TransformData(data);

            data = await new TransformerToOneTable().TransformData(data);
            data = await v.TransformData(data);

            var content = System.IO.File.ReadAllText("sqliteCreation.txt");

            data = await new SenderRazorTableOneByOne(content, @"D:\test\").TransformData(data);
            data = await v.TransformData(data);

            //data = await writeData.TransformData(data);
        }