private static ExtractionLink ReadExtractionLinkSection(XmlReader reader, WebsiteConfig config) { var extractionLink = new ExtractionLink(); ReadExtractionLinkSection(reader, config, extractionLink); return(extractionLink); }
protected IEnumerable <ResourceLink> ExtractResourceLinks(ExtractionLink extractionLink) { if (ExtractedItems.ContainsKey(extractionLink.Name)) { for (var i = 0; i < ExtractedItems[extractionLink.Name].Count; i++) { var linkValue = ExtractedItems[extractionLink.Name][i]; var linkInDocumentPositionPointer = new ResponseParserPositionPointer(extractionLink.Location, i); var linkScopedExtractedItems = new CollectionDictionary <string, string>(); foreach (var extractionItem in extractionLink.PredefinedExtractionItems.Values) { ExtractItem( extractionItem, extractionLink.PredefinedExtractionItems, linkScopedExtractedItems, extractionLink.IsPredefinedExtractionItemsLocationRelativeToLink ? linkInDocumentPositionPointer : (ResponseParserPositionPointer?)null ); } var url = linkValue; ResourceLink resourceLink; switch (extractionLink.Type) { case ExtractionLink.LinkTypes.Document: resourceLink = new DocumentLink( url, extractionLink.HttpMethod, DependencyDataSource.Resolve(extractionLink.Parameters), DependencyDataSource.Resolve(extractionLink.Headers), extractionLink.ExtractLinks, extractionLink.ExtractData, _documentLink.Config, _documentLink.Job, linkScopedExtractedItems, _documentLink ); break; case ExtractionLink.LinkTypes.File: resourceLink = new FileLink( url, DependencyDataSource.Resolve(extractionLink.Parameters), DependencyDataSource.Resolve(extractionLink.Headers), _documentLink.Config, _documentLink.Job, _documentLink ); break; case ExtractionLink.LinkTypes.Auto: resourceLink = new AutoDetectLink( linkValue, extractionLink.HttpMethod, DependencyDataSource.Resolve(extractionLink.Parameters), DependencyDataSource.Resolve(extractionLink.Headers), extractionLink.ExtractLinks, extractionLink.ExtractData, _documentLink.Config, _documentLink.Job, linkScopedExtractedItems, _documentLink ); break; default: throw new NotSupportedException(); } yield return(resourceLink); } } }
protected void ExtractLink(ExtractionLink extractionLink) { ExtractedLinks.AddValues(extractionLink.Name, ExtractResourceLinks(extractionLink)); }
private static void ReadExtractionLinkSection(XmlReader reader, WebsiteConfig config, ExtractionLink extractionLink) { ReadExtractionItemAttributes(extractionLink, reader, config); extractionLink.ExtractLinks = reader.GetAttribute("extract_links", extractionLink.ExtractLinks); extractionLink.ExtractData = reader.GetAttribute("extract_data", extractionLink.ExtractData); extractionLink.HttpMethod = reader.GetAttribute <string>("method", extractionLink.HttpMethod); extractionLink.Type = reader.GetAttribute("type", ExtractionLink.LinkTypes.Auto); extractionLink.DependsOn = reader.GetAttribute <string>("depends_on", null)?.Split(new[] { ',', ';', '|' }, StringSplitOptions.RemoveEmptyEntries); reader.ProcessChildren((childName, childReader) => { switch (childName) { case "predefined_items": extractionLink.IsPredefinedExtractionItemsLocationRelativeToLink = childReader.GetAttribute("relative", extractionLink.IsPredefinedExtractionItemsLocationRelativeToLink); extractionLink.PredefinedExtractionItems = ReadExtractionItemsSection(childReader, config); break; case "parameters": extractionLink.Parameters = ReadExtractionLinkParametersSection(childReader); break; case "headers": extractionLink.Headers = ReadHttpHeadersSection(childReader); break; case "post_processors": ReadExtractionItemPostProcessors(extractionLink, childReader); break; } }); }