public void CreateDatabaseEntry(string fileidentifier) { DescriptionScraper ds = new DescriptionScraper(fileidentifier, false); Entry e = ds.Scrape(); if (e.Identifier != "ERROR") { System.Console.WriteLine("Entry Added For " + e.Identifier); this.db.AddEntryObject(e); } else { System.Console.WriteLine("An Error Occurred."); } }
public Entry Scrape() { HtmlWeb hw = new HtmlWeb(); HtmlDocument doc = hw.Load(url); string[] substrings; string title = ""; string abstr = ""; string[] tags = null; if (doc == null) { return new Entry { Identifier = "ERROR" } } ; try { foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//p")) { substrings = node.InnerText.ToLower().Split(':'); substrings[0] = substrings[0].Substring(0, substrings[0].Length - 1); if (substrings[0].Contains("title")) { title = substrings[1].Trim().Replace(" ", "").Replace("*", ""); } if (substrings[0].Contains("abstract")) { abstr = substrings[1].Trim().Replace(" ", "").Replace("*", ""); } if (substrings[0].Contains("descriptors")) { substrings[1] = substrings[1].Trim().ToLower().Replace(" ", "").Replace("*", ""); tags = substrings[1].Split(','); for (int i = 0; i < substrings.Length; i++) { substrings[i] = substrings[i].Trim(); } } } if (title != null && abstr != null && tags != null) { return(new Entry { Identifier = identifier, Title = title, Abstr = abstr, Tags = tags }); } else { return(new Entry { Identifier = "ERROR" }); } } catch (Exception e) { if (!retry) { DescriptionScraper retry = new DescriptionScraper(identifier, true); return(retry.Scrape()); } else { return(new Entry { Identifier = "ERROR" }); } } } }