public override void Parse() { string linkText = string.Empty; string linkURL = string.Empty; try { byte[] fileByteArray = Utility.GetFileByteArray(FileUrl); if (fileByteArray != null) { using (MemoryStream fileStream = new MemoryStream(fileByteArray, false)) { Mail_Message mime = Mail_Message.ParseFromStream(fileStream); HtmlDocument doc = new HtmlDocument(); string html = string.IsNullOrEmpty(mime.BodyHtmlText) ? mime.BodyText : mime.BodyHtmlText; if (!string.IsNullOrEmpty(html)) { doc.LoadHtml(html); foreach (HtmlNode link in doc.DocumentNode.SelectNodesOrEmpty("//a[@href]")) { if (link.Attributes["href"].Value.StartsWith("#", StringComparison.InvariantCultureIgnoreCase) == false && link.Attributes["href"].Value.StartsWith("javascript:", StringComparison.InvariantCultureIgnoreCase) == false && link.Attributes["href"].Value.StartsWith("mailto:", StringComparison.InvariantCultureIgnoreCase) == false) { linkURL = link.Attributes["href"].Value; if (link.FirstChild == link.LastChild) { linkText = link.InnerText; } else { linkText = link.LastChild.InnerText; } linkText = new string(linkText.ToCharArray()).Replace("\r\n", " "); FileLink objLink = new FileLink() { ParentFileUrl = FileUrl, LinkText = linkText, LinkAddress = linkURL }; Results.Add(objLink); } } } } } } catch (Exception ex) { FileLink objLink = new FileLink() { ParentFileUrl = FileUrl, LinkText = "Error occurred when parsing this file.", LinkAddress = ex.Message, hasError = true }; Results.Add(objLink); } }
public override void Parse() { StringBuilder hyperlinkText = null; string hyperlinkRelationshipId = string.Empty; try { byte[] fileByteArray = Utility.GetFileByteArray(FileUrl); if (fileByteArray != null) { using (MemoryStream fileStream = new MemoryStream(fileByteArray, false)) { using (WordprocessingDocument doc = WordprocessingDocument.Open(fileStream, false)) { Document mainDocument = doc.MainDocumentPart.Document; // Iterate through the hyperlink elements in the // main document part. foreach (DocumentFormat.OpenXml.Wordprocessing.Hyperlink hyperlink in mainDocument.Descendants <DocumentFormat.OpenXml.Wordprocessing.Hyperlink>()) { if (hyperlink.Id != null) { hyperlinkText = new StringBuilder(); // Get the text in the document that is associated // with the hyperlink. The text could be spread across // multiple text elements so process all the text // elements that are descendants of the hyperlink element. foreach (DocumentFormat.OpenXml.Wordprocessing.Text text in hyperlink.Descendants <DocumentFormat.OpenXml.Wordprocessing.Text>()) { hyperlinkText.Append(text.InnerText); } // The hyperlink element has an explicit relationship // with the actual hyperlink. Get the relationship id // via the hyperlink element's Id attribute. hyperlinkRelationshipId = hyperlink.Id.Value; // Get the hyperlink uri via the explicit relationship Id. HyperlinkRelationship hyperlinkRelationship = doc .MainDocumentPart.HyperlinkRelationships .Single(c => c.Id == hyperlinkRelationshipId); if (hyperlinkRelationship != null && hyperlinkRelationship.IsExternal) { if (hyperlinkRelationship.Uri.IsAbsoluteUri == false) { FileLink objLink = new FileLink() { ParentFileUrl = FileUrl, LinkText = hyperlinkText.ToString(), LinkAddress = hyperlinkRelationship.Uri.OriginalString, hasError = true }; Results.Add(objLink); } else { if (hyperlinkRelationship.Uri.Scheme != Uri.UriSchemeMailto) { FileLink objLink = new FileLink() { ParentFileUrl = FileUrl, LinkText = hyperlinkText.ToString(), LinkAddress = hyperlinkRelationship.Uri.AbsoluteUri }; Results.Add(objLink); } } } } } } } } } catch (Exception ex) { FileLink objLink = new FileLink() { ParentFileUrl = FileUrl, LinkText = "Error occurred when parsing this file.", LinkAddress = ex.Message, hasError = true }; Results.Add(objLink); } }
private bool UrlIsValid(FileLink fileLink) { if (fileLink.hasError) { return(false); } string formatedURL = fileLink.LinkAddress; Uri uri = null; Uri.TryCreate(formatedURL, UriKind.Absolute, out uri); if (uri == null) { if (formatedURL.StartsWith("/")) { Uri originalURI = null; Uri.TryCreate(fileLink.ParentFileUrl, UriKind.Absolute, out originalURI); if (originalURI != null) { formatedURL = originalURI.Scheme + "://" + originalURI.Host + ":" + originalURI.Port + formatedURL; } } } Uri.TryCreate(formatedURL, UriKind.Absolute, out uri); if (uri == null) { return(false); } try { if (uri.Scheme == Uri.UriSchemeHttp || uri.Scheme == Uri.UriSchemeHttps) { if (errorHost.Contains(uri.Host)) { return(false); } if (uri.Scheme == Uri.UriSchemeHttps) { ServicePointManager.ServerCertificateValidationCallback += (sender, certificate, chain, sslPolicyErrors) => true; } try { HttpWebRequest hwReq = HttpWebRequest.Create(uri) as HttpWebRequest; hwReq.AutomaticDecompression = DecompressionMethods.GZip; hwReq.UseDefaultCredentials = true; hwReq.UserAgent = "uKnowlive Link Checker Tool"; hwReq.Timeout = 15000; hwReq.Method = "HEAD"; hwReq.KeepAlive = false; using (HttpWebResponse hwRes = hwReq.GetResponse() as HttpWebResponse) { return(hwRes == null ? false : hwRes.StatusCode == HttpStatusCode.OK); } } catch (WebException wex) { if (wex.Status == WebExceptionStatus.NameResolutionFailure) { if (errorHost.Contains(uri.Host) == false) { errorHost.Add(uri.Host); } } else if (wex.Status == WebExceptionStatus.ProtocolError) { if (wex.Response != null) { HttpWebResponse response = wex.Response as HttpWebResponse; if (response.StatusCode == HttpStatusCode.NotFound) { try { HttpWebRequest hwReqNew = HttpWebRequest.Create(uri) as HttpWebRequest; hwReqNew.AutomaticDecompression = DecompressionMethods.GZip; hwReqNew.UseDefaultCredentials = true; hwReqNew.UserAgent = "uKnowlive Link Checker Tool"; hwReqNew.Timeout = 15000; hwReqNew.Method = "GET"; hwReqNew.KeepAlive = false; using (HttpWebResponse hwResNew = hwReqNew.GetResponse() as HttpWebResponse) { try { hwReqNew.Abort(); } catch {} return(hwResNew == null ? false : hwResNew.StatusCode == HttpStatusCode.OK); } } catch {} } //else if (response.StatusCode == HttpStatusCode.Unauthorized) //{ // try // { // CredentialCache credCache = new CredentialCache(); // credCache.Add(uri, "NTLM", CredentialCache.DefaultNetworkCredentials); // HttpWebRequest hwReqNew = HttpWebRequest.Create(uri) as HttpWebRequest; // hwReqNew.AutomaticDecompression = DecompressionMethods.GZip; // hwReqNew.Credentials = credCache; // hwReqNew.UserAgent = "uKnowlive Link Checker Tool"; // hwReqNew.Timeout = 15000; // hwReqNew.Method = "HEAD"; // hwReqNew.KeepAlive = false; // using (HttpWebResponse hwResNew = hwReqNew.GetResponse() as HttpWebResponse) // { // return (hwResNew == null ? false : hwResNew.StatusCode == HttpStatusCode.OK); // } // } // catch { } //} } } } } else if (uri.Scheme == Uri.UriSchemeFtp) { FtpWebRequest ftpWebRequest = FtpWebRequest.Create(uri) as FtpWebRequest; ftpWebRequest.Method = WebRequestMethods.Ftp.ListDirectory; ftpWebRequest.KeepAlive = false; using (FtpWebResponse ftpRes = ftpWebRequest.GetResponse() as FtpWebResponse) { return(ftpRes != null); } } else if (uri.Scheme == Uri.UriSchemeFile) { if (System.IO.Path.HasExtension(uri.LocalPath)) { FileInfo fi = new FileInfo(uri.LocalPath); return(fi.Exists); } else if (uri.Segments.Length > 1) { DirectoryInfo di = new DirectoryInfo(uri.LocalPath); return(di.Exists); } else { IPHostEntry iph = Dns.GetHostEntry(uri.Host); if (iph != null && iph.AddressList.Length > 0) { return(true); } } } else if (uri.Scheme == Uri.UriSchemeMailto) { return(true); } } catch {} return(false); }
public override void Parse() { string hyperlinkText = string.Empty; string hyperlinkRelationshipId; try { byte[] fileByteArray = Utility.GetFileByteArray(FileUrl); if (fileByteArray != null) { using (MemoryStream fileStream = new MemoryStream(fileByteArray, false)) { using (PresentationDocument document = PresentationDocument.Open(fileStream, false)) { // Iterate through all the slide parts in the presentation part. foreach (SlidePart slidePart in document.PresentationPart.SlideParts) { IEnumerable <DocumentFormat.OpenXml.Drawing.HyperlinkType> links = slidePart.Slide.Descendants <DocumentFormat.OpenXml.Drawing.HyperlinkType>(); // Iterate through all the links in the slide part. foreach (DocumentFormat.OpenXml.Drawing.HyperlinkType hyperlink in links) { if (hyperlink.Id != null) { hyperlinkText = Utility.GetPPTHyperlinkText(hyperlink); hyperlinkRelationshipId = hyperlink.Id.Value; HyperlinkRelationship hyperlinkRelationship = slidePart .HyperlinkRelationships .Single(c => c.Id == hyperlinkRelationshipId); if (hyperlinkRelationship != null && hyperlinkRelationship.IsExternal) { if (hyperlinkRelationship.Uri.IsAbsoluteUri == false) { FileLink objLink = new FileLink() { ParentFileUrl = FileUrl, LinkText = hyperlinkText, LinkAddress = hyperlinkRelationship.Uri.OriginalString, hasError = true }; Results.Add(objLink); } else { if (hyperlinkRelationship.Uri.Scheme != Uri.UriSchemeMailto) { FileLink objLink = new FileLink() { ParentFileUrl = FileUrl, LinkText = hyperlinkText, LinkAddress = hyperlinkRelationship.Uri.AbsoluteUri }; Results.Add(objLink); } } } } } } } } } } catch (Exception ex) { FileLink objLink = new FileLink() { ParentFileUrl = FileUrl, LinkText = "Error occurred when parsing this file.", LinkAddress = ex.Message, hasError = true }; Results.Add(objLink); } }