Example #1
0
        public override void Parse()
        {
            string linkText = string.Empty;
            string linkURL  = string.Empty;

            try
            {
                byte[] fileByteArray = Utility.GetFileByteArray(FileUrl);

                if (fileByteArray != null)
                {
                    using (MemoryStream fileStream = new MemoryStream(fileByteArray, false))
                    {
                        Mail_Message mime = Mail_Message.ParseFromStream(fileStream);
                        HtmlDocument doc  = new HtmlDocument();
                        string       html = string.IsNullOrEmpty(mime.BodyHtmlText) ? mime.BodyText : mime.BodyHtmlText;
                        if (!string.IsNullOrEmpty(html))
                        {
                            doc.LoadHtml(html);
                            foreach (HtmlNode link in doc.DocumentNode.SelectNodesOrEmpty("//a[@href]"))
                            {
                                if (link.Attributes["href"].Value.StartsWith("#", StringComparison.InvariantCultureIgnoreCase) == false &&
                                    link.Attributes["href"].Value.StartsWith("javascript:", StringComparison.InvariantCultureIgnoreCase) == false &&
                                    link.Attributes["href"].Value.StartsWith("mailto:", StringComparison.InvariantCultureIgnoreCase) == false)
                                {
                                    linkURL = link.Attributes["href"].Value;
                                    if (link.FirstChild == link.LastChild)
                                    {
                                        linkText = link.InnerText;
                                    }
                                    else
                                    {
                                        linkText = link.LastChild.InnerText;
                                    }

                                    linkText = new string(linkText.ToCharArray()).Replace("\r\n", " ");

                                    FileLink objLink = new FileLink()
                                    {
                                        ParentFileUrl = FileUrl,
                                        LinkText      = linkText,
                                        LinkAddress   = linkURL
                                    };

                                    Results.Add(objLink);
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                FileLink objLink = new FileLink()
                {
                    ParentFileUrl = FileUrl,
                    LinkText      = "Error occurred when parsing this file.",
                    LinkAddress   = ex.Message,
                    hasError      = true
                };

                Results.Add(objLink);
            }
        }
Example #2
0
        public override void Parse()
        {
            StringBuilder hyperlinkText           = null;
            string        hyperlinkRelationshipId = string.Empty;

            try
            {
                byte[] fileByteArray = Utility.GetFileByteArray(FileUrl);

                if (fileByteArray != null)
                {
                    using (MemoryStream fileStream = new MemoryStream(fileByteArray, false))
                    {
                        using (WordprocessingDocument doc = WordprocessingDocument.Open(fileStream, false))
                        {
                            Document mainDocument = doc.MainDocumentPart.Document;

                            // Iterate through the hyperlink elements in the
                            // main document part.
                            foreach (DocumentFormat.OpenXml.Wordprocessing.Hyperlink hyperlink in mainDocument.Descendants <DocumentFormat.OpenXml.Wordprocessing.Hyperlink>())
                            {
                                if (hyperlink.Id != null)
                                {
                                    hyperlinkText = new StringBuilder();

                                    // Get the text in the document that is associated
                                    // with the hyperlink. The text could be spread across
                                    // multiple text elements so process all the text
                                    // elements that are descendants of the hyperlink element.
                                    foreach (DocumentFormat.OpenXml.Wordprocessing.Text text in hyperlink.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>())
                                    {
                                        hyperlinkText.Append(text.InnerText);
                                    }

                                    // The hyperlink element has an explicit relationship
                                    // with the actual hyperlink. Get the relationship id
                                    // via the hyperlink element's Id attribute.
                                    hyperlinkRelationshipId = hyperlink.Id.Value;

                                    // Get the hyperlink uri via the explicit relationship Id.
                                    HyperlinkRelationship hyperlinkRelationship = doc
                                                                                  .MainDocumentPart.HyperlinkRelationships
                                                                                  .Single(c => c.Id == hyperlinkRelationshipId);

                                    if (hyperlinkRelationship != null &&
                                        hyperlinkRelationship.IsExternal)
                                    {
                                        if (hyperlinkRelationship.Uri.IsAbsoluteUri == false)
                                        {
                                            FileLink objLink = new FileLink()
                                            {
                                                ParentFileUrl = FileUrl,
                                                LinkText      = hyperlinkText.ToString(),
                                                LinkAddress   = hyperlinkRelationship.Uri.OriginalString,
                                                hasError      = true
                                            };

                                            Results.Add(objLink);
                                        }
                                        else
                                        {
                                            if (hyperlinkRelationship.Uri.Scheme != Uri.UriSchemeMailto)
                                            {
                                                FileLink objLink = new FileLink()
                                                {
                                                    ParentFileUrl = FileUrl,
                                                    LinkText      = hyperlinkText.ToString(),
                                                    LinkAddress   = hyperlinkRelationship.Uri.AbsoluteUri
                                                };

                                                Results.Add(objLink);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                FileLink objLink = new FileLink()
                {
                    ParentFileUrl = FileUrl,
                    LinkText      = "Error occurred when parsing this file.",
                    LinkAddress   = ex.Message,
                    hasError      = true
                };

                Results.Add(objLink);
            }
        }
Example #3
0
        private bool UrlIsValid(FileLink fileLink)
        {
            if (fileLink.hasError)
            {
                return(false);
            }

            string formatedURL = fileLink.LinkAddress;
            Uri    uri         = null;

            Uri.TryCreate(formatedURL, UriKind.Absolute, out uri);

            if (uri == null)
            {
                if (formatedURL.StartsWith("/"))
                {
                    Uri originalURI = null;
                    Uri.TryCreate(fileLink.ParentFileUrl, UriKind.Absolute, out originalURI);
                    if (originalURI != null)
                    {
                        formatedURL = originalURI.Scheme + "://" + originalURI.Host + ":" + originalURI.Port + formatedURL;
                    }
                }
            }

            Uri.TryCreate(formatedURL, UriKind.Absolute, out uri);

            if (uri == null)
            {
                return(false);
            }

            try
            {
                if (uri.Scheme == Uri.UriSchemeHttp || uri.Scheme == Uri.UriSchemeHttps)
                {
                    if (errorHost.Contains(uri.Host))
                    {
                        return(false);
                    }

                    if (uri.Scheme == Uri.UriSchemeHttps)
                    {
                        ServicePointManager.ServerCertificateValidationCallback += (sender, certificate, chain, sslPolicyErrors) => true;
                    }

                    try
                    {
                        HttpWebRequest hwReq = HttpWebRequest.Create(uri) as HttpWebRequest;
                        hwReq.AutomaticDecompression = DecompressionMethods.GZip;
                        hwReq.UseDefaultCredentials  = true;
                        hwReq.UserAgent = "uKnowlive Link Checker Tool";
                        hwReq.Timeout   = 15000;
                        hwReq.Method    = "HEAD";
                        hwReq.KeepAlive = false;

                        using (HttpWebResponse hwRes = hwReq.GetResponse() as HttpWebResponse)
                        {
                            return(hwRes == null ? false : hwRes.StatusCode == HttpStatusCode.OK);
                        }
                    }
                    catch (WebException wex)
                    {
                        if (wex.Status == WebExceptionStatus.NameResolutionFailure)
                        {
                            if (errorHost.Contains(uri.Host) == false)
                            {
                                errorHost.Add(uri.Host);
                            }
                        }
                        else if (wex.Status == WebExceptionStatus.ProtocolError)
                        {
                            if (wex.Response != null)
                            {
                                HttpWebResponse response = wex.Response as HttpWebResponse;
                                if (response.StatusCode == HttpStatusCode.NotFound)
                                {
                                    try
                                    {
                                        HttpWebRequest hwReqNew = HttpWebRequest.Create(uri) as HttpWebRequest;
                                        hwReqNew.AutomaticDecompression = DecompressionMethods.GZip;
                                        hwReqNew.UseDefaultCredentials  = true;
                                        hwReqNew.UserAgent = "uKnowlive Link Checker Tool";
                                        hwReqNew.Timeout   = 15000;
                                        hwReqNew.Method    = "GET";
                                        hwReqNew.KeepAlive = false;

                                        using (HttpWebResponse hwResNew = hwReqNew.GetResponse() as HttpWebResponse)
                                        {
                                            try
                                            {
                                                hwReqNew.Abort();
                                            }
                                            catch {}
                                            return(hwResNew == null ? false : hwResNew.StatusCode == HttpStatusCode.OK);
                                        }
                                    }
                                    catch {}
                                }
                                //else if (response.StatusCode == HttpStatusCode.Unauthorized)
                                //{
                                //    try
                                //    {
                                //        CredentialCache credCache = new CredentialCache();
                                //        credCache.Add(uri, "NTLM", CredentialCache.DefaultNetworkCredentials);

                                //        HttpWebRequest hwReqNew = HttpWebRequest.Create(uri) as HttpWebRequest;
                                //        hwReqNew.AutomaticDecompression = DecompressionMethods.GZip;
                                //        hwReqNew.Credentials = credCache;
                                //        hwReqNew.UserAgent = "uKnowlive Link Checker Tool";
                                //        hwReqNew.Timeout = 15000;
                                //        hwReqNew.Method = "HEAD";
                                //        hwReqNew.KeepAlive = false;

                                //        using (HttpWebResponse hwResNew = hwReqNew.GetResponse() as HttpWebResponse)
                                //        {
                                //            return (hwResNew == null ? false : hwResNew.StatusCode == HttpStatusCode.OK);
                                //        }
                                //    }
                                //    catch { }
                                //}
                            }
                        }
                    }
                }
                else if (uri.Scheme == Uri.UriSchemeFtp)
                {
                    FtpWebRequest ftpWebRequest = FtpWebRequest.Create(uri) as FtpWebRequest;
                    ftpWebRequest.Method    = WebRequestMethods.Ftp.ListDirectory;
                    ftpWebRequest.KeepAlive = false;

                    using (FtpWebResponse ftpRes = ftpWebRequest.GetResponse() as FtpWebResponse)
                    {
                        return(ftpRes != null);
                    }
                }
                else if (uri.Scheme == Uri.UriSchemeFile)
                {
                    if (System.IO.Path.HasExtension(uri.LocalPath))
                    {
                        FileInfo fi = new FileInfo(uri.LocalPath);
                        return(fi.Exists);
                    }
                    else if (uri.Segments.Length > 1)
                    {
                        DirectoryInfo di = new DirectoryInfo(uri.LocalPath);
                        return(di.Exists);
                    }
                    else
                    {
                        IPHostEntry iph = Dns.GetHostEntry(uri.Host);
                        if (iph != null && iph.AddressList.Length > 0)
                        {
                            return(true);
                        }
                    }
                }
                else if (uri.Scheme == Uri.UriSchemeMailto)
                {
                    return(true);
                }
            }
            catch {}

            return(false);
        }
Example #4
0
        public override void Parse()
        {
            string hyperlinkText = string.Empty;
            string hyperlinkRelationshipId;

            try
            {
                byte[] fileByteArray = Utility.GetFileByteArray(FileUrl);

                if (fileByteArray != null)
                {
                    using (MemoryStream fileStream = new MemoryStream(fileByteArray, false))
                    {
                        using (PresentationDocument document = PresentationDocument.Open(fileStream, false))
                        {
                            // Iterate through all the slide parts in the presentation part.
                            foreach (SlidePart slidePart in document.PresentationPart.SlideParts)
                            {
                                IEnumerable <DocumentFormat.OpenXml.Drawing.HyperlinkType> links = slidePart.Slide.Descendants <DocumentFormat.OpenXml.Drawing.HyperlinkType>();

                                // Iterate through all the links in the slide part.
                                foreach (DocumentFormat.OpenXml.Drawing.HyperlinkType hyperlink in links)
                                {
                                    if (hyperlink.Id != null)
                                    {
                                        hyperlinkText           = Utility.GetPPTHyperlinkText(hyperlink);
                                        hyperlinkRelationshipId = hyperlink.Id.Value;
                                        HyperlinkRelationship hyperlinkRelationship = slidePart
                                                                                      .HyperlinkRelationships
                                                                                      .Single(c => c.Id == hyperlinkRelationshipId);
                                        if (hyperlinkRelationship != null &&
                                            hyperlinkRelationship.IsExternal)
                                        {
                                            if (hyperlinkRelationship.Uri.IsAbsoluteUri == false)
                                            {
                                                FileLink objLink = new FileLink()
                                                {
                                                    ParentFileUrl = FileUrl,
                                                    LinkText      = hyperlinkText,
                                                    LinkAddress   = hyperlinkRelationship.Uri.OriginalString,
                                                    hasError      = true
                                                };

                                                Results.Add(objLink);
                                            }
                                            else
                                            {
                                                if (hyperlinkRelationship.Uri.Scheme != Uri.UriSchemeMailto)
                                                {
                                                    FileLink objLink = new FileLink()
                                                    {
                                                        ParentFileUrl = FileUrl,
                                                        LinkText      = hyperlinkText,
                                                        LinkAddress   = hyperlinkRelationship.Uri.AbsoluteUri
                                                    };

                                                    Results.Add(objLink);
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                FileLink objLink = new FileLink()
                {
                    ParentFileUrl = FileUrl,
                    LinkText      = "Error occurred when parsing this file.",
                    LinkAddress   = ex.Message,
                    hasError      = true
                };

                Results.Add(objLink);
            }
        }