//const string Seed = "http://kenrockwell.com"; static void Main(string[] args) { Spider spider = new Spider(); LinkTable linkTable = new LinkTable(); ParseHtml parser = new ParseHtml(); InvertedIndex store = new InvertedIndex(); while (linkTable.HasLink()) { var link = linkTable.GetLink(); var webPage = spider.Crawl(link); if (webPage.Result == null || !webPage.Result.IsSuccessStatusCode || webPage.ToString().Length > 10000000 || webPage.Status == TaskStatus.Canceled || webPage.Status == TaskStatus.Faulted || webPage.IsFaulted) { continue; } var htmlDoc = parser.GetDocument(webPage.Result); if (htmlDoc.Status == TaskStatus.Faulted || htmlDoc.Status == TaskStatus.Canceled) { continue; } var linksOnPage = parser.GetLinks(htmlDoc.Result); var wordsOnPage = parser.GetWords(htmlDoc.Result); store.Add(link, wordsOnPage); linkTable.Add(linksOnPage); } }
//const string Seed = "http://kenrockwell.com"; static void Main(string[] args) { Spider spider = new Spider(); LinkTable linkTable = new LinkTable(); ParseHtml parser = new ParseHtml(); InvertedIndex store = new InvertedIndex(); while (linkTable.HasLink()) { var link = linkTable.GetLink(); var webPage = spider.Crawl(link); if (webPage.Result == null || !webPage.Result.IsSuccessStatusCode || webPage.ToString().Length > 10000000 || webPage.Status == TaskStatus.Canceled || webPage.Status == TaskStatus.Faulted || webPage.IsFaulted ) continue; var htmlDoc = parser.GetDocument(webPage.Result); if (htmlDoc.Status == TaskStatus.Faulted || htmlDoc.Status == TaskStatus.Canceled) { continue; } var linksOnPage = parser.GetLinks(htmlDoc.Result); var wordsOnPage = parser.GetWords(htmlDoc.Result); store.Add(link, wordsOnPage); linkTable.Add(linksOnPage); } }