private void ParseXml(string xml) { XmlDocument doc = new XmlDocument(); doc.LoadXml(DownloadText(xml)); XmlNodeList sitemaps = doc.GetElementsByTagName("sitemap"); if (sitemaps.Count <= 0) { // xml doc is a list of urls, not sitemaps XmlNodeList sites = doc.GetElementsByTagName("url"); foreach (XmlNode node in sites) { string urlToAdd = node["loc"].InnerText.Trim(); string lastMod = string.Empty; if (node["lastmod"] != null) { lastMod = node["lastmod"].InnerText.Trim(); } else if (node["news:news"] != null && node["news:news"]["news:publication_date"] != null) { lastMod = node["news:news"]["news:publication_date"].InnerText.Trim(); } else {// generic url AddToUrlQueue(urlToAdd); } if (IsRecentUrl(urlToAdd, lastMod)) { AddToUrlQueue(urlToAdd); } } } else { // xml is sitemaps foreach (XmlNode node in sitemaps) { string xmlToAdd = node["loc"].InnerText.Trim(); string lastMod = node["lastmod"].InnerText.Trim(); if (IsRecentUrl(xmlToAdd, lastMod)) { connection.AddToXmlQueue(xmlToAdd); } } } }
public void AddXmlDebug(string url) { connection.AddToXmlQueue(url); connection.ResumeWorkers(); }