Exemplo n.º 1
0
        private static void StartCrawl(string[] args)
        {
            var options = new Options();

            if (!CommandLine.Parser.Default.ParseArguments(args, options))
            {
                return;
            }

            var url = new Uri(options.InputUrl);

            var rootCrawler = new CompositeCrawler();
            var extractor   = new ParticipantExtractor(new EventExtractor());
            var serializer  = new JsonSerializer
            {
                Formatting = Formatting.Indented,
            };

            var writerObservable = Observable.Using(() => new JsonTextWriter(new StreamWriter(File.Open(options.Output, FileMode.Create))),
                                                    writer => Observable.Return(writer));

            rootCrawler.SubCrawlers.Add(new ParticipantCrawler(new System.Reactive.Concurrency.EventLoopScheduler()));

            var extraction = rootCrawler.Crawl(url)
                             .Do(x => Console.WriteLine("Extracting: " + x.Uri))
                             .Take(10)
                             .SelectMany(extractor.Extract)
                             .Publish();

            var dances = extraction.SelectMany(dancer => dancer.Events)
                         .Distinct(ev => ev.Name).ToList();

            var participants = extraction
                               .Do(dancer => Console.WriteLine("Processed: {0} with {1} dances", dancer.Name, dancer.Events.Count))
                               .ToList();

            Observable.Using(() => new TimerDisposable(), _ => dances.Zip(participants, (left, right) =>
                                                                          new Competition
            {
                Dancers = right,
                Events  = left,
                Version = 5
            }))
            .Do(_ => Console.WriteLine("Finished processing.  Starting write back"))
            .Subscribe(
                body =>
            {
                using (var writer = new JsonTextWriter(new StreamWriter(File.Open(options.Output, FileMode.Create))))
                {
                    serializer.Serialize(writer, body);
                }
            }, () => Console.WriteLine("Write back completed!"));

            extraction.Connect();
        }
Exemplo n.º 2
0
        private static void CrawlO2CM(string [] args)
        {
            var rootCrawler = new CompositeCrawler();

            var competitionCrawler = new CompetitionCrawler
            {
                Extractor = new CompetitionExtractor()
            };

            competitionCrawler.SubCrawlers.Add(new ScoresheetCrawler());
            rootCrawler.SubCrawlers.Add(competitionCrawler);

            var formExtractor = new FormExtractor();

            rootCrawler.Crawl(new Uri("http://results.o2cm.com/"))
            .SelectMany(formExtractor.Extract)
            .Subscribe(x =>
            {
                Console.WriteLine("Read event: " + x);
            });
        }