private static void StartCrawl(string[] args) { var options = new Options(); if (!CommandLine.Parser.Default.ParseArguments(args, options)) { return; } var url = new Uri(options.InputUrl); var rootCrawler = new CompositeCrawler(); var extractor = new ParticipantExtractor(new EventExtractor()); var serializer = new JsonSerializer { Formatting = Formatting.Indented, }; var writerObservable = Observable.Using(() => new JsonTextWriter(new StreamWriter(File.Open(options.Output, FileMode.Create))), writer => Observable.Return(writer)); rootCrawler.SubCrawlers.Add(new ParticipantCrawler(new System.Reactive.Concurrency.EventLoopScheduler())); var extraction = rootCrawler.Crawl(url) .Do(x => Console.WriteLine("Extracting: " + x.Uri)) .Take(10) .SelectMany(extractor.Extract) .Publish(); var dances = extraction.SelectMany(dancer => dancer.Events) .Distinct(ev => ev.Name).ToList(); var participants = extraction .Do(dancer => Console.WriteLine("Processed: {0} with {1} dances", dancer.Name, dancer.Events.Count)) .ToList(); Observable.Using(() => new TimerDisposable(), _ => dances.Zip(participants, (left, right) => new Competition { Dancers = right, Events = left, Version = 5 })) .Do(_ => Console.WriteLine("Finished processing. Starting write back")) .Subscribe( body => { using (var writer = new JsonTextWriter(new StreamWriter(File.Open(options.Output, FileMode.Create)))) { serializer.Serialize(writer, body); } }, () => Console.WriteLine("Write back completed!")); extraction.Connect(); }
private static void CrawlO2CM(string [] args) { var rootCrawler = new CompositeCrawler(); var competitionCrawler = new CompetitionCrawler { Extractor = new CompetitionExtractor() }; competitionCrawler.SubCrawlers.Add(new ScoresheetCrawler()); rootCrawler.SubCrawlers.Add(competitionCrawler); var formExtractor = new FormExtractor(); rootCrawler.Crawl(new Uri("http://results.o2cm.com/")) .SelectMany(formExtractor.Extract) .Subscribe(x => { Console.WriteLine("Read event: " + x); }); }