//const string Seed = "http://kenrockwell.com"; static void Main(string[] args) { Spider spider = new Spider(); LinkTable linkTable = new LinkTable(); ParseHtml parser = new ParseHtml(); InvertedIndex store = new InvertedIndex(); while (linkTable.HasLink()) { var link = linkTable.GetLink(); var webPage = spider.Crawl(link); if (webPage.Result == null || !webPage.Result.IsSuccessStatusCode || webPage.ToString().Length > 10000000 || webPage.Status == TaskStatus.Canceled || webPage.Status == TaskStatus.Faulted || webPage.IsFaulted) { continue; } var htmlDoc = parser.GetDocument(webPage.Result); if (htmlDoc.Status == TaskStatus.Faulted || htmlDoc.Status == TaskStatus.Canceled) { continue; } var linksOnPage = parser.GetLinks(htmlDoc.Result); var wordsOnPage = parser.GetWords(htmlDoc.Result); store.Add(link, wordsOnPage); linkTable.Add(linksOnPage); } }
public Evaluater(Node root, InvertedIndex index) { queue = new Queue <Token>(); this.stack = new Stack <IEnumerable <string> >(); _index = index; evaluaterOrder(root); result = stack.Pop(); }
public Evaluater(Node root, InvertedIndex index) { queue = new Queue<Token>(); this.stack = new Stack<IEnumerable<string>>(); _index = index; evaluaterOrder(root); result = stack.Pop(); }
public void Init() { //Note fill testdata into the invertedindex -- in progress index = new InvertedIndex(); index.Add(doc1, new List<string>() { "word1", "word2", "word3", "word4" }); index.Add(doc2, new List<string>() { "word2", "word4", "word6", "word8" }); index.Add(doc3, new List<string>() { "word3", "word6", "word9", "word12" }); index.Add(doc4, new List<string>() { "word4", "word8", "word12", "word16" }); index.Add(doc5, new List<string>() { "word5", "word10", "word15", "word20" }); index.Add(doc6, new List<string>() { "word6", "word12", "word18", "word24" }); }
//const string Seed = "http://kenrockwell.com"; static void Main(string[] args) { Spider spider = new Spider(); LinkTable linkTable = new LinkTable(); ParseHtml parser = new ParseHtml(); InvertedIndex store = new InvertedIndex(); while (linkTable.HasLink()) { var link = linkTable.GetLink(); var webPage = spider.Crawl(link); if (webPage.Result == null || !webPage.Result.IsSuccessStatusCode || webPage.ToString().Length > 10000000 || webPage.Status == TaskStatus.Canceled || webPage.Status == TaskStatus.Faulted || webPage.IsFaulted ) continue; var htmlDoc = parser.GetDocument(webPage.Result); if (htmlDoc.Status == TaskStatus.Faulted || htmlDoc.Status == TaskStatus.Canceled) { continue; } var linksOnPage = parser.GetLinks(htmlDoc.Result); var wordsOnPage = parser.GetWords(htmlDoc.Result); store.Add(link, wordsOnPage); linkTable.Add(linksOnPage); } }