private void load() { HttpWebRequest wrq; HttpWebResponse wr; try { wrq = (HttpWebRequest)WebRequest.Create(url); wrq.UserAgent = "LOTW UNI Crawler/computer science research project"; wrq.Timeout = 5000; wr = (HttpWebResponse)wrq.GetResponse(); } catch (Exception) { source = ""; return; } if (wr.ContentType.IndexOf("text/html") > -1) { source = new StreamReader(wr.GetResponseStream()).ReadToEnd(); title = extractTitle(); TagRemover tr = new TagRemover(); source = tr.ProcessText(new StringReader(source)); } else { source = ""; } wr.Close(); }
static void Main(string[] args) { string source = i.Helper.PAGES_PATH; string target = i.Helper.DOCS_PATH; i.TagRemover tr = new i.TagRemover(source, target); tr.RemoveTags(); i.TitleExtractor te = new i.TitleExtractor(); te.Extract(i.Helper.PAGES_PATH); i.LinkProcessor lp = new i.LinkProcessor(); lp.Run(); i.DataHelper dh = new i.DataHelper(); dh.AddTitleUrlTags(); i.TermParser p = new i.TermParser(); p.ExtractTerms(); i.Calculator c = new i.Calculator(); c.CalculateIdfs(); c.CalculateIdfsA(); i.AnchorTextProcessor at = new IrProject.Indexing.AnchorTextProcessor(); at.AddAnchorText(); Tester t = new Tester(); t.Run(); TextConverter tc = new TextConverter(); tc.ConvertTermDocTable(); }