Пример #1
0
        public void WritePageStatic(HtmlData pageData)
        {
            try
            {
                //Write to a file
                sw.WriteLine("Current Page : " + pageData.Title + "  URL :" + pageData.Url);
                foreach (var line in pageData.MetadataList)
                {
                    sw.WriteLine(line);
                }

                foreach (var line in pageData.AnchorList)
                {
                    sw.WriteLine(line);
                }

                foreach (var line in pageData.ImageList)
                {
                    sw.WriteLine(line);
                }
            }
            catch (Exception ex)
            {
                Program.log.Error("Error wrting page statistic");
                Program.log.Error(ex.Message);
            }
        }
Пример #2
0
        private static HtmlData ParseURL(string URL, SiteMapWriter stmapper)
        {
            //parse this page out put a map with
            HtmlData parsedData = new HtmlData();

            try
            {
                Document document = GetDocumentFromUrl(URL);

                // Parsing ...
                parsedData.Clear();
                parsedData.Url     = document.BaseUri;
                parsedData.RawHtml = document.Html();
                parsedData.Title   = document.Title;
                foreach (Element meta in document.Select("meta"))
                {
                    parsedData.MetadataList.Add(meta.OuterHtml());
                }
                foreach (Element image in document.Select("img"))
                {
                    parsedData.ImageList.Add(image.Attr("src"));
                }
                foreach (Element anchor in document.Select("a"))
                {
                    if (anchor.BaseUri.TrimEnd('/') != anchor.Attr("href"))
                    {
                        if (URLS.IsValidUri(anchor.Attr("href")))
                        {
                            parsedData.AnchorList.Add(new Uri(anchor.Attr("href")));
                        }
                        else
                        {
                            log.Info("A href was invalid : " + anchor.Attr("href"));
                        }
                    }
                    else
                    {
                        parsedData.AnchorList.Add(new Uri(anchor.BaseUri));
                    }
                }
                //links to static content such as images
                //and to external URLs (but do not visit them)
                stmapper.WritePageStatic(parsedData);
            }
            catch (Exception ex)
            {
                log.Error("There was a problem retriving URL : " + URL);
                log.Error(ex.Message);
            }
            return(parsedData);
        }