Example #1
0
            public void Download()
            {
                _tickableProgress.Message("Indexing " + _url);
                string filePath             = TempFileManager.Instance.CreateTempFile();
                WebRequestWithCache request = new WebRequestWithCache(_url);

                Stream     response   = request.GetResponseStream(WebRequestWithCache.CacheSettings.CHECKCACHE, _timeout);
                FileStream fileStream = new FileStream(filePath, FileMode.Open);

                using (response)
                    using (fileStream)
                        StreamHelper.Transfer(response, fileStream);

                _filePath = filePath;

                _tickableProgress.Tick();
            }
Example #2
0
        /// <summary>
        /// Used as a part of HTML thinning to remove extraneous child nodes from an HTMLDOMNode
        /// </summary>
        /// <param name="node">The node whose children should be stripped</param>
        /// <returns>An HTML string with the DOMNodes cleaned out</returns>
        private static void StripChildNodes(IHTMLDOMNode node, StringBuilder escapedText, bool preserveImages, TickableProgressTick progress)
        {
            // is this a text node?  If so, just get the text and return it
            if (node.nodeType == HTMLDocumentHelper.HTMLDOMNodeTypes.TextNode)
            {
                escapedText.Append(HttpUtility.HtmlEncode(node.nodeValue.ToString()));
            }
            else
            {
                progress.Tick();
                bool      tagStillOpen = false;
                ArrayList preserveTags = PreserveTags;
                if (preserveImages)
                {
                    preserveTags = PreserveTagsWithImages;
                }

                // if we're in an element node (a tag) and we should preserve the tag,
                // append it to the returned text
                if (preserveTags.Contains(node.nodeName))
                {
                    // Append the opening tag element, with any extraneous
                    // attributes stripped
                    escapedText.Append("<" + node.nodeName);
                    StripAttributes((IHTMLElement)node, escapedText);

                    // if the element has no children, we can simply close out the tag
                    if (!node.hasChildNodes())
                    {
                        if (node.nodeName == HTMLTokens.IFrame)
                        {
                            escapedText.Append("></" + node.nodeName + ">");
                        }
                        else
                        {
                            escapedText.Append("/>");
                        }
                    }
                    else                     // the element has children, leave the tag open
                    {
                        escapedText.Append(">");
                        tagStillOpen = true;
                    }
                }
                else if (ReplaceTags.Contains(node.nodeName))
                {
                    // If there are no children, just emit the replacement tag
                    if (!node.hasChildNodes())
                    {
                        // Replace the tag
                        escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>");
                    }
                    else
                    {
                        if (!IsChildlessTag((string)ReplaceTags[node.nodeName]))
                        {
                            escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + ">");
                        }
                        // Since there are children, we're going to emit the replacement
                        // tag at the end of this node
                        tagStillOpen = true;
                    }
                }

                if (node.firstChild != null)
                {
                    StripChildNodes(node.firstChild, escapedText, preserveImages, progress);
                }

                // put a closing tag in for the current element (because we left it open in case of children)
                if (tagStillOpen)
                {
                    if (PreserveTags.Contains(node.nodeName))
                    {
                        escapedText.Append("</" + node.nodeName + ">");
                    }
                    else if (ReplaceTags.Contains(node.nodeName))
                    {
                        if (!IsChildlessTag((string)ReplaceTags[node.nodeName]))
                        {
                            escapedText.Append("</" + (string)ReplaceTags[node.nodeName] + ">");
                        }
                        else
                        {
                            escapedText.Append("<" + (string)ReplaceTags[node.nodeName] + "/>");
                        }
                    }
                }
            }

            if (node.nextSibling != null)
            {
                StripChildNodes(node.nextSibling, escapedText, preserveImages, progress);
            }
        }