Exemplo n.º 1
0
        private void BindData()
        {
            DataSet ds = new d.LinkData().GetOutboundLinksP(getId(), pager.PageSize, pager.CurrentPage, dgrPages.SortExpression);

            dgrPages.DataSource = ds.Tables[0];
            dgrPages.DataBind();
            pager.AdjustAfterBinding(Convert.ToInt32(ds.Tables[1].Rows[0][0]));
        }
Exemplo n.º 2
0
        private void load()
        {
            DataTable docs = new d.DocData().GetLinkCounts();

            inboundLinks  = new Hashtable();
            outboundLinks = new Hashtable();

            int id;

            foreach (DataRow dr in docs.Rows)
            {
                id = Convert.ToInt32(dr[0]);
                inboundLinks.Add(id, new int[Convert.ToInt32(dr[1])]);
                outboundLinks.Add(id, new int[Convert.ToInt32(dr[2])]);
            }

            d.LinkData linkData = new d.LinkData();
            DataTable  dt       = linkData.GetLinksSortByTo();

            int currId = -1;
            int cursor = 0;
            int toid;
            int fromid;

            int[] currLinks = null;
            foreach (DataRow dr in dt.Rows)
            {
                toid   = Convert.ToInt32(dr[0]);
                fromid = Convert.ToInt32(dr[1]);

                if (currId < toid)
                {
                    cursor    = 0;
                    currId    = toid;
                    currLinks = (int[])inboundLinks[toid];
                }
                currLinks[cursor++] = fromid;
            }

            dt = linkData.GetLinksSortByFrom();

            currId    = -1;
            cursor    = 0;
            currLinks = null;
            foreach (DataRow dr in dt.Rows)
            {
                fromid = Convert.ToInt32(dr[0]);
                toid   = Convert.ToInt32(dr[1]);
                if (currId < fromid)
                {
                    cursor    = 0;
                    currId    = fromid;
                    currLinks = (int[])outboundLinks[fromid];
                }
                currLinks[cursor++] = toid;
            }
        }
Exemplo n.º 3
0
        public void ConvertLinkTable()
        {
            DataTable dt = new d.LinkData().GetAll();
            string    fromid;
            string    toid;
            string    text;

            foreach (DataRow dr in dt.Rows)
            {
                StringBuilder sb = new StringBuilder();
                fromid = dr[1].ToString();
                toid   = dr[2].ToString();
                text   = dr[3].ToString();
                sb.AppendFormat("{0} ", fromid);
                sb.AppendFormat("{0} ", toid);
                sb.AppendFormat("[[{0}]]", text);
                Console.WriteLine(sb.ToString());
            }
        }
Exemplo n.º 4
0
        public void Run()
        {
            UrlHelper       urlHelper = new UrlHelper();
            Regex           regex     = makeRegex();
            MatchCollection mc;

            Regex rSpace = new Regex(@"\s");

            d.DocData  pd = new d.DocData();
            d.LinkData ld = new d.LinkData();

            Uri      baseUri;
            Uri      childUri;
            FileInfo fi;

            d.Doc        p;
            int          pageId;
            StreamReader sr;
            string       html;
            string       linkToProcess;
            int          linkId;
            string       linkText;

            string path = Helper.DOCS_PATH;

            string[] files = Directory.GetFiles(path);

            for (int i = 545; i < files.Length; i++)            //545 already done
            {
                Console.WriteLine("processing file #" + i);

                fi      = new FileInfo(files[i]);
                pageId  = Convert.ToInt32(fi.Name.Substring(0, fi.Name.IndexOf(".")));
                p       = new d.Doc(pageId);
                baseUri = new Uri(p.Url);

                sr   = new StreamReader(fi.OpenRead());
                html = sr.ReadToEnd();
                mc   = regex.Matches(html);

                Console.WriteLine("found " + mc.Count + " links");

                foreach (Match m in mc)
                {
                    try
                    {
                        childUri      = new Uri(baseUri, urlHelper.MakeLink(m.Groups[3].ToString()));
                        linkToProcess = urlHelper.NormalizeUrl(childUri.AbsoluteUri);
                        linkText      = m.Groups[4].ToString();

                        linkId = pd.GetIdByUrl(linkToProcess);
                        if (linkId > 0 && linkText != "")                         //found page!
                        {
                            linkText = rSpace.Replace(linkText, " ");
                            linkText = linkText.Trim();
                            linkText = linkText.Replace("          ", " ");
                            linkText = linkText.Replace("         ", " ");
                            linkText = linkText.Replace("        ", " ");
                            linkText = linkText.Replace("       ", " ");
                            linkText = linkText.Replace("      ", " ");
                            linkText = linkText.Replace("     ", " ");
                            linkText = linkText.Replace("    ", " ");
                            linkText = linkText.Replace("   ", " ");
                            linkText = linkText.Replace("  ", " ");
                            ld.UpdateText(pageId, linkId, linkText);
                        }
                    }
                    catch (Exception) {}
                }

                //if (i % 100 == 0)
                //	Console.WriteLine("processing file #" + i);
            }
        }
Exemplo n.º 5
0
        public void AddAnchorText()
        {
            d.StopList  stopList    = new d.StopList();
            ParseHelper parseHelper = new ParseHelper();

            char[]        delims  = parseHelper.GetDelims();
            PorterStemmer stemmer = new PorterStemmer();

            d.LinkData    ld  = new d.LinkData();
            d.TermDocData tdd = new d.TermDocData();

            DataTable     linksTable;
            int           docId;
            StringBuilder sb;

            string[]  terms;
            string    term;
            Hashtable currTerms;
            DataTable dt = new d.DocData().GetIds();

            for (int i = 0; i < dt.Rows.Count; i++)
            {
                if (i % 10 == 0)
                {
                    Console.WriteLine(i);
                }

                //accumulate all link text for this doc into StringBuilder
                sb         = new StringBuilder();
                docId      = (int)dt.Rows[i][0];
                linksTable = ld.GetRecordsByToId(docId);
                foreach (DataRow dr in linksTable.Rows)
                {
                    sb.AppendFormat("{0} ", dr[0].ToString());
                }

                //accum terms + counts into currTerms hashtable
                currTerms = new Hashtable();
                terms     = sb.ToString().Split(delims);
                for (int j = 0; j < terms.Length; j++)
                {
                    term = stemmer.stemTerm(terms[j].ToLower().Trim());
                    if (term != "home" && term.Length > 0 && term.Length < 25 && !stopList.Contains(term) && parseHelper.IsAsciiLetters(term))
                    {
                        if (!currTerms.Contains(term))
                        {
                            currTerms.Add(term, 1);
                        }
                        else
                        {
                            currTerms[term] = (int)currTerms[term] + 1;
                        }
                    }
                }

                //write terms and counts to database
                IDictionaryEnumerator en = currTerms.GetEnumerator();
                string currTerm;
                int    currCount;
                while (en.MoveNext())
                {
                    currTerm  = en.Key.ToString();
                    currCount = (int)currTerms[currTerm];
                    tdd.UpdateAnchorTextCount(currTerm, docId, currCount);
                }
            }
        }