public static async Task ChannelVideos() { using var x = await TestSetup.TextCtx(); var ws = x.Scope.Resolve <YtWeb>(); var res = await ws.ChannelVideos("UChN7H3JFqeFC-WB8NCxhn7g", x.Log).ToListAsync(); }
public static async Task ExtraParts() { using var ctx = await TestSetup.TextCtx(); var scraper = ctx.Scope.Resolve <YtWeb>(); var extra = await scraper.GetExtra(ctx.Log, "O63NEnuJupU", new[] { ExtraPart.EComment }); }
public static async Task WebRecsAndExtra() { using var ctx = await TestSetup.TextCtx(); var plans = new VideoExtraPlans(new[] { "Su1FQUkMojU", // JP video with lots of comments //"V8kxdw0UASE", // should work. looks like ti was errored and then re-instated //"XztR0CnVKNo", // normal // "JPiiySjShng", //nbc suspected parsing problem //"OijWK4Y6puI", //unlisted //"-sc6JCu5rZk", //"y3oMtX8NyqY", //copyright2 //"EqulyMs_M2M", // copyright1 //"-6oswxLuRyk", /* * "tdUxfq6DYXY", // when retreived was var ytInitialData instead of window["ytInitialData"] * "gRJnTYHID3w", // var ytInitialData instead of window["ytInitialData"] * "MbXbFchrTgw", * "rBu0BRTx2x8", // region restricted (not available in AU, but is in US)*/ //"-ryPLVEExA0", // private }); var collector = ctx.Scope.Resolve <YtCollector>(); var extra = await collector.GetExtras(plans, ctx.Log).ToListAsync(); }
public static async Task ChannelData() { using var ctx = await TestSetup.TextCtx(); var api = ctx.Resolve <YtClient>(); var data = await new[] { "UCMDxbhGcsE7EnknxPEzC_Iw", "UCHEf6T_gVq4tlW5i91ESiWg", "UCYeF244yNGuFefuFKqxIAXw" } .BlockFunc(c => api.ChannelData(c, full: true)); }
public static async Task Captions() { using var ctx = await TestSetup.TextCtx(); var scraper = ctx.Scope.Resolve <YtWeb>(); var tracks = await scraper.GetCaptionTracks("yu_C_K3TuyY", ctx.Log); var en = tracks.First(t => t.Language.Code == "en"); var captions = await scraper.GetClosedCaptionTrackAsync(en, ctx.Log); }
public static async Task TestSfQuery() { using var ctx = await TestSetup.TextCtx(); var conn = ctx.Scope.Resolve <SnowflakeConnectionProvider>(); using var db = await conn.Open(ctx.Log); var res = await db.Query <VideoResult>("test", "select video_id from video_latest limit 10"); }
public static async Task WatchPageParsing() { using var x = await TestSetup.TextCtx(); var docs = Setup.SolutionDir.Combine("Tests", "WatchPageHtml") .Files("*.html") .Select(f => Html.ParseDocument(f.OpenText().ReadToEnd())); var scrape = x.Resolve <YtWeb>(); var clientObjects = docs.Select(d => scrape.GetRecs2(x.Log, d, "(fake video id)")).ToList(); }
public static async Task VideoComments() { // get comments, does watch page html have it using var ctx = await TestSetup.TextCtx(); var ws = ctx.Scope.Resolve <YtWeb>(); var video = await ws.GetExtra(ctx.Log, "NjJ2YEBK3Ic", new[] { ExtraPart.EComment }); await video.Comments.ToJsonl("comments.jsonl"); }
public static async Task TestPipeApp() { var ctx = await TestSetup.TextCtx(); var b = new ContainerBuilder(); b.RegisterType <PipeApp>(); b.Register(_ => ctx.Log).As <ILogger>(); var scope = b.Build(); // relies on a local dev isntance. use vscode to start an Azurite blob service with a container called pipe var store = new AzureBlobFileStore("UseDevelopmentStorage=true", "pipe", ctx.Log); var pipeCtx = new PipeCtx(new (), new (scope, typeof(PipeApp)), store, ctx.Log); var res = await pipeCtx.Run((PipeApp app) => app.MakeAndSum((int)15L, 1.Thousands(), DataStoreType.Backup), new () { Location = PipeRunLocation.Local }); res.Metadata.Error.Should().BeFalse(); }
public static async Task ChromeRecsAndExtra() { // get comments, does watch page html have it using var ctx = await TestSetup.TextCtx(); var chrome = ctx.Resolve <ChromeScraper>(); var vids = new[] { "rBu0BRTx2x8", // region restricted (not available in AU, but is in US) "-ryPLVEExA0", // private /*"Ms9WOSXU5tY", "n_vzBGB3F_Y", * "xxQOtOCbASs", // tall * "DLq1DUcMh1Q"*/ }; var chromeExtras = await chrome.GetRecsAndExtra(vids, ctx.Log); }
public static async Task TestProxyFallback() { using var x = await TestSetup.TextCtx(); var scraper = x.Scope.Resolve <YtWeb>(); using var httpTest = new HttpTest(); var rw = httpTest.ForCallsTo("*youtube.com*").RespondWith("mock too many requests failure", status: 429); var getExtra = scraper.GetExtra(x.Log, "Su1FQUkMojU", new[] { ExtraPart.EComment }); await 5.Seconds().Delay(); rw.AllowRealHttp(); var extra = await getExtra; // this should have fallen back to proxy and retried a once or twice in the 5 seconds. scraper.Client.UseProxy.Should().Be(true); }
public async Task TestGraphRunner() { var ctx = await TestSetup.TextCtx();; var res = await TaskGraph.FromMethods( (l, c) => Shorten(l), (l, c) => Generate(l, true), (l, c) => NotDependent(l)) .Run(parallel: 2, ctx.Log, CancellationToken.None); var resByName = res.ToKeyedCollection(r => r.Name); resByName[nameof(Generate)].FinalStatus.Should().Be(GraphTaskStatus.Error); resByName[nameof(Shorten)].FinalStatus.Should().Be(GraphTaskStatus.Cancelled); resByName[nameof(NotDependent)].FinalStatus.Should().Be(GraphTaskStatus.Success); ctx.Log.Information("Res {Res}, Shortened {Values}", res.Join("\n"), shortened); }
public static async Task TestBatchBlock() { var ctx = await TestSetup.TextCtx(); ctx.Log.Information("TestBatchBlock started"); var numItems = 10_000_000; var(res, dur) = await MakeItems(numItems, ctx.Log) .BlockBatch(async(b, i) => { await 1.Seconds().Delay(); ctx.Log.Debug("batch {Batch} processed", i); return(b.Count); }, 100_000, 8).WithDuration(); ctx.Log.Information("Processing {Items} took {Duration} {Speed}", numItems, dur.HumanizeShort(), (numItems / 1000).Speed("K items", dur).Humanize()); res.Sum().Should().Be(numItems); }
public static async Task TestAmazonProduct() { using var x = await TestSetup.TextCtx(); var aw = x.Scope.Resolve <AmazonWeb>(); aw.FlurlClient.UseProxy = true; var links = new[] { "https://www.amazon.com/shop/sadiealdis", // unable to be decoded //"https://amzn.to/2DGXN96", // empty response //"https://amzn.to/39tl3nX", // empty response //"https://www.amazon.com/Manfrotto-MKCOMPACTACN-BK-Compact-Action-Tripod/dp/B07JMQJKC8?th=1", // comments leaking into props //"https://www.amazon.com/gp/product/B005VYCFXA", // product details in bullet form //"https://www.amazon.com/gp/product/B07TC76671", //"https://amzn.to/2ZMojrd" }; var completed = await aw.ProcessLinks(links, x.Log, cancel : default); }
public static async Task Channels() { using var x = await TestSetup.TextCtx(); var ws = x.Scope.Resolve <YtWeb>(); var chans = await new[] { "UCdfQFG50Hu88-1CpRmPDA2A", // user channel with pagination of subs "UChN7H3JFqeFC-WB8NCxhn7g", // error - unavaialbe "UCaJ8FsMMnefU7NXdMaXW8WQ", // error - terminated "UCdQ5jrBSBEOUKr91f6zucag", // user "UCUowFWIWGw6Pv2JqfEj8njQ", // channel }.BlockTrans(async c => { var chan = await ws.Channel(x.Log, c); return(new { Chan = chan, Subscriptions = await chan.Subscriptions().ToListAsync(), Vids = await chan.Videos().ToListAsync() }); }).ToListAsync(); }