Beispiel #1
0
        private void ParseXml(string xml)
        {
            XmlDocument doc = new XmlDocument();

            doc.LoadXml(DownloadText(xml));

            XmlNodeList sitemaps = doc.GetElementsByTagName("sitemap");

            if (sitemaps.Count <= 0)
            {
                // xml doc is a list of urls, not sitemaps
                XmlNodeList sites = doc.GetElementsByTagName("url");
                foreach (XmlNode node in sites)
                {
                    string urlToAdd = node["loc"].InnerText.Trim();
                    string lastMod  = string.Empty;
                    if (node["lastmod"] != null)
                    {
                        lastMod = node["lastmod"].InnerText.Trim();
                    }
                    else if (node["news:news"] != null && node["news:news"]["news:publication_date"] != null)
                    {
                        lastMod = node["news:news"]["news:publication_date"].InnerText.Trim();
                    }
                    else
                    {// generic url
                        AddToUrlQueue(urlToAdd);
                    }

                    if (IsRecentUrl(urlToAdd, lastMod))
                    {
                        AddToUrlQueue(urlToAdd);
                    }
                }
            }
            else
            {
                // xml is sitemaps
                foreach (XmlNode node in sitemaps)
                {
                    string xmlToAdd = node["loc"].InnerText.Trim();
                    string lastMod  = node["lastmod"].InnerText.Trim();
                    if (IsRecentUrl(xmlToAdd, lastMod))
                    {
                        connection.AddToXmlQueue(xmlToAdd);
                    }
                }
            }
        }
Beispiel #2
0
 public void AddXmlDebug(string url)
 {
     connection.AddToXmlQueue(url);
     connection.ResumeWorkers();
 }