示例#1
0
        public IEnumerable <string> ResolveAll(StringWithDependencies stringWithDependencies)
        {
            if (stringWithDependencies == null)
            {
                yield return(null);
            }

            if (!stringWithDependencies.RequiresResolve)
            {
                yield return(stringWithDependencies.FormatString);
            }

            var resolvedValues = new CollectionDictionary <string>();

            foreach (var dependencyName in stringWithDependencies.DependencyNames)
            {
                if (ExtractedItems.ContainsKey(dependencyName))
                {
                    var dependencyValues = ExtractedItems[dependencyName];
                    foreach (var dependencyValue in dependencyValues)
                    {
                        resolvedValues.AddValue(dependencyName, dependencyValue);
                    }
                }
                else if (ConfigPredefinedValues.Dictionary.ContainsKey(dependencyName))
                {
                    var dependencyValue = ConfigPredefinedValues.Dictionary[dependencyName];
                    resolvedValues.AddValues(dependencyName, dependencyValue);
                }
                else if (JobPredefinedValues != null && JobPredefinedValues.Dictionary.ContainsKey(dependencyName))
                {
                    var dependencyValue = JobPredefinedValues.Dictionary[dependencyName];
                    resolvedValues.AddValues(dependencyName, dependencyValue);
                }
                else
                {
                    resolvedValues.AddValue(dependencyName, string.Empty);
                    Trace.TraceError($"{GetType().Name}.ResolveAll: Could not resolve item {dependencyName} with dependencies [{string.Join(",", stringWithDependencies.DependencyNames)}] based on extracted items {ExtractedItems.Select(pred => string.Format("[{0}: {1}]", pred.Key, string.Join(",", pred.Value)))}");
                }
            }

            foreach (var combination in FormatAllDependencyCombinations(stringWithDependencies, resolvedValues))
            {
                yield return(combination);
            }
        }
示例#2
0
        protected void ExtractItem(
            ExtractionItem extractionItem,
            IDictionary <string, ExtractionItem> extractionItems,
            CollectionDictionary <string, string> extractedItems,
            ResponseParserPositionPointer?relativeLocationBase = null
            )
        {
            if (extractedItems.ContainsKey(extractionItem.Name))
            {
                return; // already extracted as someone's dependency
            }
            ExtractDependencies(extractionItem, extractionItems, extractedItems, relativeLocationBase);

            var extractedValues = new List <string>();

            // constant value, if specified in config
            if (extractionItem.Value != null)
            {
                extractedValues.Add(DependencyDataSource.Resolve(extractionItem.Value));
            }

            // values, extracted from page using selector
            if (extractionItem.Location != null)
            {
                var documentParsers = new List <DocumentProcessor>();
                // If Context is not specified for item, then item is extracted from base document, without any complexities
                if (extractionItem.Context == null)
                {
                    documentParsers.Add(this);
                }
                else
                {
                    // If Context is specified, we're creating a separate DocumentParser for every context item
                    // Create once per Context item, not parse the document 10 times for each element extracted
                    var contextItemName = DependencyDataSource.Resolve(extractionItem.Context.ContextItemName);

                    if (!ContextItemDocumentParsers.TryGetValue(contextItemName, out var contextDocumentParsers))
                    {
                        // Generate context parsers for this ContextItem if they have not been generated before
                        ContextItemDocumentParsers[contextItemName]
                              = contextDocumentParsers
                              = new List <DocumentProcessor>();

                        var contextResourceFrames = ExtractedFrames[contextItemName];
                        var contextDocumentLinks  = contextResourceFrames.OfType <DocumentLink>().ToArray();

                        Debug.Assert(contextResourceFrames.Count == contextDocumentLinks.Length);

                        var contextDocumentStrings = ExtractedItems.GetValues(contextItemName);

                        Debug.Assert(contextDocumentLinks.Length == contextDocumentStrings.Count);

                        for (var i = 0; i < contextDocumentLinks.Length; i++)
                        {
                            // TODO: Documents and Downloaded strings order is not the same. Refactor.
                            var documentString = contextDocumentStrings[i];
                            var documentLink   = contextDocumentLinks[i];

                            var contextDocumentParser =
                                _documentLink.Config.CreateDocumentStringParser(
                                    documentString,
                                    documentLink,
                                    extractionItem.Context.ContextDocumentType
                                    );

                            // Reusing document parsers globally inside the config. Otherwise we get infinite recursive
                            contextDocumentParser.ContextItemDocumentParsers = ContextItemDocumentParsers;
                            contextDocumentParser.ExtractedItems             = ExtractedItems;
                            contextDocumentParser.ExtractedLinks             = ExtractedLinks;
                            contextDocumentParser.ExtractedFrames            = ExtractedFrames;

                            contextDocumentParsers.Add(contextDocumentParser);
                            contextDocumentParser.Parse();
                        }
                    }

                    documentParsers.AddRange(contextDocumentParsers);
                }

                foreach (var documentParser in documentParsers)
                {
                    extractedValues.AddRange(
                        documentParser.ExtractItemValuesFromLocation(extractionItem.Location, relativeLocationBase)
                        .Select(pred => pred.ExtractedValue)
                        );
                }
            }

            // TODO: Reconsider architecture
            // Links are not post-processed at all and frames are post-processed on download only

            // apply post-processing, if specified
            extractedItems.AddValues(
                extractionItem.Name,
                extractionItem.PostProcessOnExtraction
                    ? PostProcess(extractedValues, extractionItem.PostProcessors, DependencyDataSource)
                    : extractedValues
                );
        }