public DownloadResults Download() { TickableProgressTick tickableProgress = new TickableProgressTick(_progressHost, _urlsToDownload.Count); Hashtable workItems = new Hashtable(); foreach (string url in _urlsToDownload.Keys) { DownloadWorkItem workItem = new DownloadWorkItem(url, (int)_urlsToDownload[url], tickableProgress); workItems.Add(url, workItem); _downloadQueue.Enqueue(workItem); } ParallelExecution execution = new ParallelExecution(new ThreadStart(DoWork), _threadCount); execution.Execute(); DownloadResults results = new DownloadResults(); foreach (string url in workItems.Keys) { results.AddResult(url, ((DownloadWorkItem)workItems[url]).FilePath); } return(results); }
public DownloadWorkItem(string url, int timeout, TickableProgressTick tickableProgress) { _url = url; _timeout = timeout; _tickableProgress = tickableProgress; }
public static string Thin(IHTMLElement startElement, bool preserveImages, IProgressHost progressHost) { StringBuilder escapedText = new StringBuilder(); if (startElement != null) { IHTMLElementCollection elements = (IHTMLElementCollection)startElement.all; TickableProgressTick progress = new TickableProgressTick(progressHost, elements.length + 1); IHTMLDOMNode startNode = (IHTMLDOMNode)startElement; StripChildNodes(startNode, escapedText, preserveImages, progress); } return(escapedText.ToString()); }
public DownloadResults Download() { TickableProgressTick tickableProgress = new TickableProgressTick(_progressHost, _urlsToDownload.Count); Hashtable workItems = new Hashtable(); foreach (string url in _urlsToDownload.Keys) { DownloadWorkItem workItem = new DownloadWorkItem(url, (int)_urlsToDownload[url], tickableProgress); workItems.Add(url, workItem); _downloadQueue.Enqueue(workItem); } ParallelExecution execution = new ParallelExecution(new ThreadStart(DoWork), _threadCount); execution.Execute(); DownloadResults results = new DownloadResults(); foreach (string url in workItems.Keys) { results.AddResult(url, ((DownloadWorkItem)workItems[url]).FilePath); } return results; }
/// <summary> /// Used as a part of HTML thinning to remove extraneous child nodes from an HTMLDOMNode /// </summary> /// <param name="node">The node whose children should be stripped</param> /// <returns>An HTML string with the DOMNodes cleaned out</returns> private static void StripChildNodes(IHTMLDOMNode node, StringBuilder escapedText, bool preserveImages, TickableProgressTick progress) { // is this a text node? If so, just get the text and return it if (node.nodeType == HTMLDocumentHelper.HTMLDOMNodeTypes.TextNode) { escapedText.Append(HttpUtility.HtmlEncode(node.nodeValue.ToString())); } else { progress.Tick(); bool tagStillOpen = false; ArrayList preserveTags = PreserveTags; if (preserveImages) { preserveTags = PreserveTagsWithImages; } // if we're in an element node (a tag) and we should preserve the tag, // append it to the returned text if (preserveTags.Contains(node.nodeName)) { // Append the opening tag element, with any extraneous // attributes stripped escapedText.Append("<" + node.nodeName); StripAttributes((IHTMLElement)node, escapedText); // if the element has no children, we can simply close out the tag if (!node.hasChildNodes()) { if (node.nodeName == HTMLTokens.IFrame) { escapedText.Append("></" + node.nodeName + ">"); } else { escapedText.Append("/>"); } } else // the element has children, leave the tag open { escapedText.Append(">"); tagStillOpen = true; } } else if (ReplaceTags.Contains(node.nodeName)) { // If there are no children, just emit the replacement tag if (!node.hasChildNodes()) { // Replace the tag escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>"); } else { if (!IsChildlessTag((string)ReplaceTags[node.nodeName])) { escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + ">"); } // Since there are children, we're going to emit the replacement // tag at the end of this node tagStillOpen = true; } } if (node.firstChild != null) { StripChildNodes(node.firstChild, escapedText, preserveImages, progress); } // put a closing tag in for the current element (because we left it open in case of children) if (tagStillOpen) { if (PreserveTags.Contains(node.nodeName)) { escapedText.Append("</" + node.nodeName + ">"); } else if (ReplaceTags.Contains(node.nodeName)) { if (!IsChildlessTag((string)ReplaceTags[node.nodeName])) { escapedText.Append("</" + (string)ReplaceTags[node.nodeName] + ">"); } else { escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>"); } } } } if (node.nextSibling != null) { StripChildNodes(node.nextSibling, escapedText, preserveImages, progress); } }