Example #1
0
        public DownloadResults Download()
        {
            TickableProgressTick tickableProgress = new TickableProgressTick(_progressHost, _urlsToDownload.Count);

            Hashtable workItems = new Hashtable();

            foreach (string url in _urlsToDownload.Keys)
            {
                DownloadWorkItem workItem = new DownloadWorkItem(url, (int)_urlsToDownload[url], tickableProgress);
                workItems.Add(url, workItem);
                _downloadQueue.Enqueue(workItem);
            }

            ParallelExecution execution = new
                                          ParallelExecution(new ThreadStart(DoWork), _threadCount);

            execution.Execute();

            DownloadResults results = new DownloadResults();

            foreach (string url in workItems.Keys)
            {
                results.AddResult(url, ((DownloadWorkItem)workItems[url]).FilePath);
            }
            return(results);
        }
Example #2
0
 public DownloadWorkItem(string url, int timeout,
                         TickableProgressTick tickableProgress)
 {
     _url              = url;
     _timeout          = timeout;
     _tickableProgress = tickableProgress;
 }
Example #3
0
        public static string Thin(IHTMLElement startElement, bool preserveImages, IProgressHost progressHost)
        {
            StringBuilder escapedText = new StringBuilder();

            if (startElement != null)
            {
                IHTMLElementCollection elements  = (IHTMLElementCollection)startElement.all;
                TickableProgressTick   progress  = new TickableProgressTick(progressHost, elements.length + 1);
                IHTMLDOMNode           startNode = (IHTMLDOMNode)startElement;
                StripChildNodes(startNode, escapedText, preserveImages, progress);
            }
            return(escapedText.ToString());
        }
        public DownloadResults Download()
        {
            TickableProgressTick tickableProgress = new TickableProgressTick(_progressHost, _urlsToDownload.Count);

            Hashtable workItems = new Hashtable();
            foreach (string url in _urlsToDownload.Keys)
            {
                DownloadWorkItem workItem = new DownloadWorkItem(url, (int)_urlsToDownload[url], tickableProgress);
                workItems.Add(url, workItem);
                _downloadQueue.Enqueue(workItem);
            }

            ParallelExecution execution = new
                ParallelExecution(new ThreadStart(DoWork), _threadCount);
            execution.Execute();

            DownloadResults results = new DownloadResults();
            foreach (string url in workItems.Keys)
            {
                results.AddResult(url, ((DownloadWorkItem)workItems[url]).FilePath);
            }
            return results;
        }
Example #5
0
        /// <summary>
        /// Used as a part of HTML thinning to remove extraneous child nodes from an HTMLDOMNode
        /// </summary>
        /// <param name="node">The node whose children should be stripped</param>
        /// <returns>An HTML string with the DOMNodes cleaned out</returns>
        private static void StripChildNodes(IHTMLDOMNode node, StringBuilder escapedText, bool preserveImages, TickableProgressTick progress)
        {
            // is this a text node?  If so, just get the text and return it
            if (node.nodeType == HTMLDocumentHelper.HTMLDOMNodeTypes.TextNode)
            {
                escapedText.Append(HttpUtility.HtmlEncode(node.nodeValue.ToString()));
            }
            else
            {
                progress.Tick();
                bool      tagStillOpen = false;
                ArrayList preserveTags = PreserveTags;
                if (preserveImages)
                {
                    preserveTags = PreserveTagsWithImages;
                }

                // if we're in an element node (a tag) and we should preserve the tag,
                // append it to the returned text
                if (preserveTags.Contains(node.nodeName))
                {
                    // Append the opening tag element, with any extraneous
                    // attributes stripped
                    escapedText.Append("<" + node.nodeName);
                    StripAttributes((IHTMLElement)node, escapedText);

                    // if the element has no children, we can simply close out the tag
                    if (!node.hasChildNodes())
                    {
                        if (node.nodeName == HTMLTokens.IFrame)
                        {
                            escapedText.Append("></" + node.nodeName + ">");
                        }
                        else
                        {
                            escapedText.Append("/>");
                        }
                    }
                    else                     // the element has children, leave the tag open
                    {
                        escapedText.Append(">");
                        tagStillOpen = true;
                    }
                }
                else if (ReplaceTags.Contains(node.nodeName))
                {
                    // If there are no children, just emit the replacement tag
                    if (!node.hasChildNodes())
                    {
                        // Replace the tag
                        escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>");
                    }
                    else
                    {
                        if (!IsChildlessTag((string)ReplaceTags[node.nodeName]))
                        {
                            escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + ">");
                        }
                        // Since there are children, we're going to emit the replacement
                        // tag at the end of this node
                        tagStillOpen = true;
                    }
                }

                if (node.firstChild != null)
                {
                    StripChildNodes(node.firstChild, escapedText, preserveImages, progress);
                }

                // put a closing tag in for the current element (because we left it open in case of children)
                if (tagStillOpen)
                {
                    if (PreserveTags.Contains(node.nodeName))
                    {
                        escapedText.Append("</" + node.nodeName + ">");
                    }
                    else if (ReplaceTags.Contains(node.nodeName))
                    {
                        if (!IsChildlessTag((string)ReplaceTags[node.nodeName]))
                        {
                            escapedText.Append("</" + (string)ReplaceTags[node.nodeName] + ">");
                        }
                        else
                        {
                            escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>");
                        }
                    }
                }
            }

            if (node.nextSibling != null)
            {
                StripChildNodes(node.nextSibling, escapedText, preserveImages, progress);
            }
        }
 public DownloadWorkItem(string url, int timeout,
     TickableProgressTick tickableProgress)
 {
     _url = url;
     _timeout = timeout;
     _tickableProgress = tickableProgress;
 }