Пример #1
0
        private void load()
        {
            HttpWebRequest  wrq;
            HttpWebResponse wr;

            try
            {
                wrq           = (HttpWebRequest)WebRequest.Create(url);
                wrq.UserAgent = "LOTW UNI Crawler/computer science research project";
                wrq.Timeout   = 5000;
                wr            = (HttpWebResponse)wrq.GetResponse();
            }
            catch (Exception)
            {
                source = "";
                return;
            }
            if (wr.ContentType.IndexOf("text/html") > -1)
            {
                source = new StreamReader(wr.GetResponseStream()).ReadToEnd();
                title  = extractTitle();
                TagRemover tr = new TagRemover();
                source = tr.ProcessText(new StringReader(source));
            }
            else
            {
                source = "";
            }
            wr.Close();
        }
Пример #2
0
        static void Main(string[] args)
        {
            string source = i.Helper.PAGES_PATH;
            string target = i.Helper.DOCS_PATH;

            i.TagRemover tr = new i.TagRemover(source, target);
            tr.RemoveTags();
            i.TitleExtractor te = new i.TitleExtractor();
            te.Extract(i.Helper.PAGES_PATH);
            i.LinkProcessor lp = new i.LinkProcessor();
            lp.Run();
            i.DataHelper dh = new i.DataHelper();
            dh.AddTitleUrlTags();
            i.TermParser p = new i.TermParser();
            p.ExtractTerms();

            i.Calculator c = new i.Calculator();
            c.CalculateIdfs();
            c.CalculateIdfsA();

            i.AnchorTextProcessor at = new IrProject.Indexing.AnchorTextProcessor();
            at.AddAnchorText();

            Tester t = new Tester();

            t.Run();

            TextConverter tc = new TextConverter();

            tc.ConvertTermDocTable();
        }