Пример #1
0
        /// <summary>
        /// Stores a binary resource to the local file system.
        /// </summary>
        /// <returns>Return the info about the stored data.</returns>
        public DownloadedResourceInfo StoreBinary(
            byte[] binaryContent,
            UriResourceInfo uriInfo)
        {
            DownloadedResourceInfo result =
                new DownloadedResourceInfo(
                    uriInfo,
                    _settings.Options.DestinationFolderPath);

            try
            {
                if (result.LocalFilePath.Exists)
                {
                    result.LocalFilePath.Delete();
                }

                if (binaryContent != null && binaryContent.Length > 0)
                {
                    result.LocalFilePath.Create();
                    Trace.WriteLine($"Writing binary content to file '{result.LocalFilePath}'.");

                    using (FileStream s = result.LocalFilePath.OpenWrite())
                    {
                        s.Write(binaryContent, 0, binaryContent.Length);
                    }
                }
            }
            catch (Exception x)
            {
                Trace.WriteLine($"Ignoring exception while storing binary file: '{ x.Message}'.");
            }

            return(result);
        }
Пример #2
0
 public UriResourceInfo(UriResourceInfo copyFrom)
 {
     _options     = copyFrom._options;
     _originalUrl = copyFrom._originalUrl;
     _relativeUri = copyFrom._relativeUri;
     _baseUri     = copyFrom._baseUri;
     _absoluteUri = copyFrom._absoluteUri;
     _linkType    = copyFrom._linkType;
 }
Пример #3
0
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="settings">The settings.</param>
 /// <param name="uriInfo">The URI info.</param>
 /// <param name="textContent">Content of the text.</param>
 public ResourceParser(
     Settings settings,
     UriResourceInfo uriInfo,
     string textContent)
 {
     _settings    = settings;
     _uriInfo     = uriInfo;
     _textContent = textContent;
 }
Пример #4
0
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="copyFrom">The copy from.</param>
 /// <param name="folderPath">The folder path.</param>
 /// <param name="baseFolderPath">The base folder path.</param>
 public DownloadedResourceInfo(
     UriResourceInfo copyFrom,
     DirectoryInfo folderPath,
     DirectoryInfo baseFolderPath)
     :
     base(copyFrom)
 {
     _localFolderPath     = folderPath;
     _localBaseFolderPath = baseFolderPath;
 }
Пример #5
0
        /// <summary>
        /// Detects URLs in styles.
        /// </summary>
        /// <param name="baseUri">The base URI.</param>
        /// <param name="attributeName">Name of the attribute.</param>
        /// <param name="attributeValue">The attribute value.</param>
        /// <returns></returns>
        private List <UriResourceInfo> ExtractStyleUrls(
            Uri baseUri,
            string attributeName,
            string attributeValue)
        {
            List <UriResourceInfo> result =
                new List <UriResourceInfo>();

            if (string.Compare(attributeName, @"style", true) == 0)
            {
                if (attributeValue != null &&
                    attributeValue.Trim().Length > 0)
                {
                    MatchCollection matchs = Regex.Matches(
                        attributeValue,
                        @"url\s*\(\s*([^\)\s]+)\s*\)",
                        RegexOptions.Singleline | RegexOptions.IgnoreCase);

                    if (matchs.Count > 0)
                    {
                        foreach (Match match in matchs)
                        {
                            if (match != null && match.Success)
                            {
                                string url = match.Groups[1].Value;

                                UriResourceInfo ui =
                                    new UriResourceInfo(
                                        _settings.Options,
                                        url,
                                        new Uri(url, UriKind.RelativeOrAbsolute),
                                        baseUri,
                                        UriType.Resource);

                                bool isOnSameSite =
                                    ui.IsOnSameSite(baseUri);

                                if ((isOnSameSite ||
                                     !_settings.Options.StayOnSite) &&
                                    ui.IsProcessableUri)
                                {
                                    result.Add(ui);
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }
Пример #6
0
        /// <summary>
        /// Constructor.
        /// </summary>
        /// <param name="copyFrom">The copy from.</param>
        /// <param name="baseFolderPath">The base folder path.</param>
        public DownloadedResourceInfo(
            UriResourceInfo copyFrom,
            DirectoryInfo baseFolderPath)
            :
            base(copyFrom)
        {
            _localBaseFolderPath = baseFolderPath;

            _localFilePath = new FileInfo(
                Path.Combine(
                    baseFolderPath.FullName,
                    MakeLocalFileName(
                        copyFrom.AbsoluteUri,
                        copyFrom.BaseUri,
                        copyFrom.LinkType).Name));

            _localFileName =
                new FileInfo(_localFilePath.Name);
        }
Пример #7
0
        /// <summary>
        /// Stores a HTML resource to the local file system.
        /// Does no hyperlink replacement.
        /// </summary>
        /// <returns>Return the info about the stored data.</returns>
        public DownloadedResourceInfo StoreHtml(
            string textContent,
            Encoding encoding,
            UriResourceInfo uriInfo)
        {
            DownloadedResourceInfo result =
                new DownloadedResourceInfo(
                    uriInfo,
                    _settings.Options.DestinationFolderPath);

            try
            {
                if (result.LocalFilePath.Exists)
                {
                    result.LocalFilePath.Delete();
                }

                if (!result.LocalFilePath.Directory.Exists)
                {
                    result.LocalFilePath.Directory.Create();
                }

                Trace.WriteLine($"Writing text content to file '{result.LocalFilePath}'.");

                using (FileStream s = new FileStream(result.LocalFilePath.FullName, FileMode.Create, FileAccess.Write))
                    using (StreamWriter w = new StreamWriter(s, encoding))
                    {
                        w.Write(textContent);
                    }
            }
            catch (Exception x)
            {
                Trace.WriteLine($"Ignoring IO exception while storing HTML file: '{x.Message}'.");
            }


            return(result);
        }
Пример #8
0
        /// <summary>
        /// Does the extract links.
        /// </summary>
        /// <param name="xml">The XML.</param>
        /// <param name="uriInfo">The URI info.</param>
        /// <returns></returns>
        private List <UriResourceInfo> DoExtractLinks(
            XmlReader xml,
            UriResourceInfo uriInfo)
        {
            List <UriResourceInfo> links = new List <UriResourceInfo>();

            while (xml.Read())
            {
                switch (xml.NodeType)
                {
                // Added 2016-10-16: Inside comments, too.

                case XmlNodeType.Comment:
                    XmlReader childXml = GetDocReader(xml.Value, uriInfo.BaseUri);

                    List <UriResourceInfo> childLinks = DoExtractLinks(childXml, uriInfo);
                    links.AddRange(childLinks);
                    break;

                // A node element.

                case XmlNodeType.Element:
                    string[] linkAttributeNames;
                    UriType  linkType;

                    // If this is a link element, store the URLs to modify.
                    if (IsLinkElement(
                            xml.Name,
                            out linkAttributeNames,
                            out linkType))
                    {
                        while (xml.MoveToNextAttribute())
                        {
                            links.AddRange(
                                ExtractStyleUrls(
                                    uriInfo.BaseUriWithFolder,
                                    xml.Name,
                                    xml.Value));

                            foreach (string a in linkAttributeNames)
                            {
                                if (string.Compare(a, xml.Name, true) == 0)
                                {
                                    string url = xml.Value;

                                    UriResourceInfo ui =
                                        new UriResourceInfo(
                                            _settings.Options,
                                            url,
                                            new Uri(url, UriKind.RelativeOrAbsolute),
                                            uriInfo.BaseUriWithFolder,
                                            linkType);

                                    bool isOnSameSite =
                                        ui.IsOnSameSite(uriInfo.BaseUri);

                                    if ((isOnSameSite ||
                                         !_settings.Options.StayOnSite) &&
                                        ui.IsProcessableUri)
                                    {
                                        links.Add(ui);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        // Also, look for style attributes.
                        while (xml.MoveToNextAttribute())
                        {
                            links.AddRange(
                                ExtractStyleUrls(
                                    uriInfo.BaseUriWithFolder,
                                    xml.Name,
                                    xml.Value));
                        }
                    }
                    break;
                }
            }

            return(links);
        }
Пример #9
0
        /// <summary>
        /// Replace URIs inside a given HTML document that was previously
        /// downloaded with the local URIs.
        /// </summary>
        /// <returns>Returns the content text with the replaced links.</returns>
        public string ReplaceLinks(
            string textContent,
            UriResourceInfo uriInfo)
        {
            ResourceParser parser = new ResourceParser(
                _settings,
                uriInfo,
                textContent);

            List <UriResourceInfo> linkInfos = parser.ExtractLinks();

            // For remembering duplicates.
            Dictionary <string, string> replacedLinks =
                new Dictionary <string, string>();

            // --

            foreach (UriResourceInfo linkInfo in linkInfos)
            {
                if (linkInfo.WantFollowUri || linkInfo.IsResourceUri)
                {
                    DownloadedResourceInfo dlInfo =
                        new DownloadedResourceInfo(
                            linkInfo,
                            _settings.Options.DestinationFolderPath);

                    //					/*
                    if (!string.IsNullOrEmpty(linkInfo.OriginalUrl))
                    {
                        string textContentBefore = textContent;

                        string link =
                            Regex.Escape(linkInfo.OriginalUrl);

                        textContent = Regex.Replace(
                            textContent,
                            string.Format(@"""{0}""", link),
                            string.Format(@"""{0}""", dlInfo.LocalFileName),
                            RegexOptions.IgnoreCase | RegexOptions.Multiline);

                        textContent = Regex.Replace(
                            textContent,
                            string.Format(@"'{0}'", link),
                            string.Format(@"'{0}'", dlInfo.LocalFileName),
                            RegexOptions.IgnoreCase | RegexOptions.Multiline);

                        // For style-"url(...)"-links.
                        textContent = Regex.Replace(
                            textContent,
                            string.Format(@"\(\s*{0}\s*\)", link),
                            string.Format(@"({0})", dlInfo.LocalFileName),
                            RegexOptions.IgnoreCase | RegexOptions.Multiline);

                        // Some checking.
                        // 2016-10-16, Uwe Keim.
                        if (linkInfo.OriginalUrl != dlInfo.LocalFileName.Name &&
                            textContentBefore == textContent &&
                            !replacedLinks.ContainsKey(linkInfo.AbsoluteUri.AbsolutePath))
                        {
                            throw new ApplicationException($"Failed to replace URI '{linkInfo.OriginalUrl}' with URI '{dlInfo.LocalFileName}' in HTML text '{textContent}'.");
                        }
                        else
                        {
                            // Remember.
                            replacedLinks[linkInfo.AbsoluteUri.AbsolutePath] =
                                linkInfo.AbsoluteUri.AbsolutePath;
                        }
                    }
                    //					*/
                }
            }

            // --

            return(textContent);
        }