public IEnumerable <string> ResolveAll(StringWithDependencies stringWithDependencies) { if (stringWithDependencies == null) { yield return(null); } if (!stringWithDependencies.RequiresResolve) { yield return(stringWithDependencies.FormatString); } var resolvedValues = new CollectionDictionary <string>(); foreach (var dependencyName in stringWithDependencies.DependencyNames) { if (ExtractedItems.ContainsKey(dependencyName)) { var dependencyValues = ExtractedItems[dependencyName]; foreach (var dependencyValue in dependencyValues) { resolvedValues.AddValue(dependencyName, dependencyValue); } } else if (ConfigPredefinedValues.Dictionary.ContainsKey(dependencyName)) { var dependencyValue = ConfigPredefinedValues.Dictionary[dependencyName]; resolvedValues.AddValues(dependencyName, dependencyValue); } else if (JobPredefinedValues != null && JobPredefinedValues.Dictionary.ContainsKey(dependencyName)) { var dependencyValue = JobPredefinedValues.Dictionary[dependencyName]; resolvedValues.AddValues(dependencyName, dependencyValue); } else { resolvedValues.AddValue(dependencyName, string.Empty); Trace.TraceError($"{GetType().Name}.ResolveAll: Could not resolve item {dependencyName} with dependencies [{string.Join(",", stringWithDependencies.DependencyNames)}] based on extracted items {ExtractedItems.Select(pred => string.Format("[{0}: {1}]", pred.Key, string.Join(",", pred.Value)))}"); } } foreach (var combination in FormatAllDependencyCombinations(stringWithDependencies, resolvedValues)) { yield return(combination); } }
protected void ExtractItem( ExtractionItem extractionItem, IDictionary <string, ExtractionItem> extractionItems, CollectionDictionary <string, string> extractedItems, ResponseParserPositionPointer?relativeLocationBase = null ) { if (extractedItems.ContainsKey(extractionItem.Name)) { return; // already extracted as someone's dependency } ExtractDependencies(extractionItem, extractionItems, extractedItems, relativeLocationBase); var extractedValues = new List <string>(); // constant value, if specified in config if (extractionItem.Value != null) { extractedValues.Add(DependencyDataSource.Resolve(extractionItem.Value)); } // values, extracted from page using selector if (extractionItem.Location != null) { var documentParsers = new List <DocumentProcessor>(); // If Context is not specified for item, then item is extracted from base document, without any complexities if (extractionItem.Context == null) { documentParsers.Add(this); } else { // If Context is specified, we're creating a separate DocumentParser for every context item // Create once per Context item, not parse the document 10 times for each element extracted var contextItemName = DependencyDataSource.Resolve(extractionItem.Context.ContextItemName); if (!ContextItemDocumentParsers.TryGetValue(contextItemName, out var contextDocumentParsers)) { // Generate context parsers for this ContextItem if they have not been generated before ContextItemDocumentParsers[contextItemName] = contextDocumentParsers = new List <DocumentProcessor>(); var contextResourceFrames = ExtractedFrames[contextItemName]; var contextDocumentLinks = contextResourceFrames.OfType <DocumentLink>().ToArray(); Debug.Assert(contextResourceFrames.Count == contextDocumentLinks.Length); var contextDocumentStrings = ExtractedItems.GetValues(contextItemName); Debug.Assert(contextDocumentLinks.Length == contextDocumentStrings.Count); for (var i = 0; i < contextDocumentLinks.Length; i++) { // TODO: Documents and Downloaded strings order is not the same. Refactor. var documentString = contextDocumentStrings[i]; var documentLink = contextDocumentLinks[i]; var contextDocumentParser = _documentLink.Config.CreateDocumentStringParser( documentString, documentLink, extractionItem.Context.ContextDocumentType ); // Reusing document parsers globally inside the config. Otherwise we get infinite recursive contextDocumentParser.ContextItemDocumentParsers = ContextItemDocumentParsers; contextDocumentParser.ExtractedItems = ExtractedItems; contextDocumentParser.ExtractedLinks = ExtractedLinks; contextDocumentParser.ExtractedFrames = ExtractedFrames; contextDocumentParsers.Add(contextDocumentParser); contextDocumentParser.Parse(); } } documentParsers.AddRange(contextDocumentParsers); } foreach (var documentParser in documentParsers) { extractedValues.AddRange( documentParser.ExtractItemValuesFromLocation(extractionItem.Location, relativeLocationBase) .Select(pred => pred.ExtractedValue) ); } } // TODO: Reconsider architecture // Links are not post-processed at all and frames are post-processed on download only // apply post-processing, if specified extractedItems.AddValues( extractionItem.Name, extractionItem.PostProcessOnExtraction ? PostProcess(extractedValues, extractionItem.PostProcessors, DependencyDataSource) : extractedValues ); }