Exemple #1
0
        public CollectionCrawlingQueueProxy(InitializationLink initializationLink, ICollection <ResourceLink> resourceLinks, params CollectionCrawlingQueueProxy[] dependencies)
            : base(dependencies)
        {
            if (initializationLink != null)
            {
                InitializationLink = initializationLink;
            }

            foreach (var resourceLink in resourceLinks)
            {
                QueueItems.Add(new CrawlingQueueItem(resourceLink));
            }
        }
Exemple #2
0
        private static InitializationLink ReadInitializationDocumentSection(XmlReader reader, WebsiteConfig config, WebsiteJob job)
        {
            var url        = reader.GetAttribute("url");
            var httpMethod = reader.GetAttribute <string>("method", System.Net.WebRequestMethods.Http.Get);

            IDictionary <string, string>         parameters = null;
            IDictionary <string, string>         headers    = null;
            IDictionary <string, ExtractionItem> extractionItemsOverride = new Dictionary <string, ExtractionItem>();

            while (!(reader.Name == "initialization" && reader.NodeType == XmlNodeType.EndElement) && reader.Read())
            {
                if (!reader.IsStartElement())
                {
                    continue;
                }

                switch (reader.Name)
                {
                case "parameters":
                    parameters = ReadExtractionLinkParametersSection(reader).ToDictionary(pred => pred.Key, pred => pred.Value.FormatString);
                    break;

                case "headers":
                    headers = ReadHttpHeadersSection(reader).ToDictionary(pred => pred.Key, pred => pred.Value.FormatString);
                    break;

                case "extraction":
                    extractionItemsOverride = ReadExtractionItemsSection(reader, config);
                    break;
                }
                //switch (reader.Name)
                //{
                //    case "frame":
                //        var extractionFrame = ReadExtractionFrameSection(reader, config);
                //        result.ExtractionItemsOverride.Add(extractionFrame.Name, extractionFrame);
                //        break;
                //    case "link":
                //        var extractionLink = ReadExtractionLinkSection(reader, config);
                //        result.ExtractionItemsOverride.Add(extractionLink.Name, extractionLink);
                //        break;
                //    case "item":
                //        var extractionItem = ReadExtractionItemSection(reader, config);
                //        result.ExtractionItemsOverride.Add(extractionItem.Name, extractionItem);
                //        break;
                //    default:
                //        throw new ArgumentException("Unrecognized element", reader.Name);
                //}
            }


            var result = new InitializationLink(
                url,
                httpMethod,
                parameters,
                headers,
                true,
                true,
                config,
                job
                );

            result.ExtractionItemsOverride = extractionItemsOverride;

            return(result);
        }