private void BindData() { DataSet ds = new d.LinkData().GetOutboundLinksP(getId(), pager.PageSize, pager.CurrentPage, dgrPages.SortExpression); dgrPages.DataSource = ds.Tables[0]; dgrPages.DataBind(); pager.AdjustAfterBinding(Convert.ToInt32(ds.Tables[1].Rows[0][0])); }
private void load() { DataTable docs = new d.DocData().GetLinkCounts(); inboundLinks = new Hashtable(); outboundLinks = new Hashtable(); int id; foreach (DataRow dr in docs.Rows) { id = Convert.ToInt32(dr[0]); inboundLinks.Add(id, new int[Convert.ToInt32(dr[1])]); outboundLinks.Add(id, new int[Convert.ToInt32(dr[2])]); } d.LinkData linkData = new d.LinkData(); DataTable dt = linkData.GetLinksSortByTo(); int currId = -1; int cursor = 0; int toid; int fromid; int[] currLinks = null; foreach (DataRow dr in dt.Rows) { toid = Convert.ToInt32(dr[0]); fromid = Convert.ToInt32(dr[1]); if (currId < toid) { cursor = 0; currId = toid; currLinks = (int[])inboundLinks[toid]; } currLinks[cursor++] = fromid; } dt = linkData.GetLinksSortByFrom(); currId = -1; cursor = 0; currLinks = null; foreach (DataRow dr in dt.Rows) { fromid = Convert.ToInt32(dr[0]); toid = Convert.ToInt32(dr[1]); if (currId < fromid) { cursor = 0; currId = fromid; currLinks = (int[])outboundLinks[fromid]; } currLinks[cursor++] = toid; } }
public void ConvertLinkTable() { DataTable dt = new d.LinkData().GetAll(); string fromid; string toid; string text; foreach (DataRow dr in dt.Rows) { StringBuilder sb = new StringBuilder(); fromid = dr[1].ToString(); toid = dr[2].ToString(); text = dr[3].ToString(); sb.AppendFormat("{0} ", fromid); sb.AppendFormat("{0} ", toid); sb.AppendFormat("[[{0}]]", text); Console.WriteLine(sb.ToString()); } }
public void Run() { UrlHelper urlHelper = new UrlHelper(); Regex regex = makeRegex(); MatchCollection mc; Regex rSpace = new Regex(@"\s"); d.DocData pd = new d.DocData(); d.LinkData ld = new d.LinkData(); Uri baseUri; Uri childUri; FileInfo fi; d.Doc p; int pageId; StreamReader sr; string html; string linkToProcess; int linkId; string linkText; string path = Helper.DOCS_PATH; string[] files = Directory.GetFiles(path); for (int i = 545; i < files.Length; i++) //545 already done { Console.WriteLine("processing file #" + i); fi = new FileInfo(files[i]); pageId = Convert.ToInt32(fi.Name.Substring(0, fi.Name.IndexOf("."))); p = new d.Doc(pageId); baseUri = new Uri(p.Url); sr = new StreamReader(fi.OpenRead()); html = sr.ReadToEnd(); mc = regex.Matches(html); Console.WriteLine("found " + mc.Count + " links"); foreach (Match m in mc) { try { childUri = new Uri(baseUri, urlHelper.MakeLink(m.Groups[3].ToString())); linkToProcess = urlHelper.NormalizeUrl(childUri.AbsoluteUri); linkText = m.Groups[4].ToString(); linkId = pd.GetIdByUrl(linkToProcess); if (linkId > 0 && linkText != "") //found page! { linkText = rSpace.Replace(linkText, " "); linkText = linkText.Trim(); linkText = linkText.Replace(" ", " "); linkText = linkText.Replace(" ", " "); linkText = linkText.Replace(" ", " "); linkText = linkText.Replace(" ", " "); linkText = linkText.Replace(" ", " "); linkText = linkText.Replace(" ", " "); linkText = linkText.Replace(" ", " "); linkText = linkText.Replace(" ", " "); linkText = linkText.Replace(" ", " "); ld.UpdateText(pageId, linkId, linkText); } } catch (Exception) {} } //if (i % 100 == 0) // Console.WriteLine("processing file #" + i); } }
public void AddAnchorText() { d.StopList stopList = new d.StopList(); ParseHelper parseHelper = new ParseHelper(); char[] delims = parseHelper.GetDelims(); PorterStemmer stemmer = new PorterStemmer(); d.LinkData ld = new d.LinkData(); d.TermDocData tdd = new d.TermDocData(); DataTable linksTable; int docId; StringBuilder sb; string[] terms; string term; Hashtable currTerms; DataTable dt = new d.DocData().GetIds(); for (int i = 0; i < dt.Rows.Count; i++) { if (i % 10 == 0) { Console.WriteLine(i); } //accumulate all link text for this doc into StringBuilder sb = new StringBuilder(); docId = (int)dt.Rows[i][0]; linksTable = ld.GetRecordsByToId(docId); foreach (DataRow dr in linksTable.Rows) { sb.AppendFormat("{0} ", dr[0].ToString()); } //accum terms + counts into currTerms hashtable currTerms = new Hashtable(); terms = sb.ToString().Split(delims); for (int j = 0; j < terms.Length; j++) { term = stemmer.stemTerm(terms[j].ToLower().Trim()); if (term != "home" && term.Length > 0 && term.Length < 25 && !stopList.Contains(term) && parseHelper.IsAsciiLetters(term)) { if (!currTerms.Contains(term)) { currTerms.Add(term, 1); } else { currTerms[term] = (int)currTerms[term] + 1; } } } //write terms and counts to database IDictionaryEnumerator en = currTerms.GetEnumerator(); string currTerm; int currCount; while (en.MoveNext()) { currTerm = en.Key.ToString(); currCount = (int)currTerms[currTerm]; tdd.UpdateAnchorTextCount(currTerm, docId, currCount); } } }