public List <string> Preview(string key, string url) { List <string> results = new List <string>(); string rkLowerFilter = TableQuery.GenerateFilterCondition("RowKey", QueryComparisons.Equal, LinkRow.CreateMD5(url)); string pkUpperFilter = TableQuery.GenerateFilterCondition("PartitionKey", QueryComparisons.Equal, key); // Note CombineFilters has the effect of “([Expression1]) Operator (Expression2]), as such passing in a complex expression will result in a logical grouping. string combinedFilter = TableQuery.CombineFilters(rkLowerFilter, TableOperators.And, pkUpperFilter); try { TableQuery <LinkRow> rangeQuery = new TableQuery <LinkRow>() .Where(combinedFilter); var result = DBConnect.GetLinkTable().ExecuteQuery(rangeQuery).Take(1).ToList(); LinkRow res = result[0]; results.Add(res.Title); results.Add(res.URL); results.Add(res.Img); results.Add(res.Body); } catch { results.Add("No Preview Found"); } return(results); }
void AddLinkRow(SyncLink link) { LinkRow ll = new LinkRow(link); ll.LinkDeletionRequested += delegate { DataManager.RemoveLink(link.Title); }; ll.EditLinkRequested += delegate { new LinkDataForm(link).ShowDialog(); }; ll.LinkRowSelected += delegate { SelectLinkRow(link); }; ll.SyncStartRequested += delegate { StartSync(link); }; ll.SyncCancellationRequested += delegate { CancelSync(link); }; ll.Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right; ll.Width = dataTable.Width; _linkRows.Add(ll); dataTable.Controls.Add(ll, 0, _linkRows.Count); }
/// <summary> /// update general synchronisation info /// </summary> void UpdateSyncInfo() { //update total progress info float tp = DataManager.GetTotalProgress(); progressBar_total.Value = tp; label_p.Text = $"{tp:0.0}%"; //update each sync data of link foreach (SyncLink l in DataManager.Links) { LinkRow linkRow = GetLinkRow(l); if (l.SyncInfo == null) { continue; } linkRow.UpdateSyncData(); } }
private int calculateScore(LinkRow res, List <string> words) { int total = 0; bool wikiBonus = true; foreach (string word in words) { if (res.Title.ToLower().Contains(word) && !res.URL.Contains("wikipedia")) { total = total + 1; } if (!res.Title.ToLower().Contains(word) && res.URL.Contains("wikipedia")) { wikiBonus = false; } } if (res.URL.Contains("wikipedia") && wikiBonus) { total = total + 4; } return(total); }
public static void ProcessHTML(string url) { System.Diagnostics.Debug.WriteLine("ProcessHTML"); try { HtmlWeb hw = new HtmlWeb(); System.Diagnostics.Debug.WriteLine("HTML Loaded"); HtmlDocument doc = hw.Load(url); string imglink = "none"; try { var img = doc.DocumentNode.SelectNodes("//img[contains(@class, 'media__image')]").FirstOrDefault(); imglink = img.Attributes["src"].Value; } catch { imglink = "none"; } var title = "no title found"; try { title = doc.DocumentNode.SelectSingleNode("//head/title").InnerText; } catch { try { title = doc.DocumentNode.Descendants("title").FirstOrDefault().InnerText; } catch { title = "no title found"; } } List <string> masterIndex = new List <string>(); if (title != "no title found") { string[] titleWords = title.Split(new Char[] { ' ', ',', '.', ':', '!', '?', '-', '%', '"' }); foreach (string wd in titleWords) { if (!stopwordstitle.Contains(wd.ToLower()) && !masterIndex.Contains(wd.ToLower()) && !masterIndex.Contains(String.Concat(wd, "s").ToLower()) && wd.Length > 1) { if (!masterIndex.Contains(wd.Substring(0, wd.Length - 1))) { if (wd.Contains("'")) { string nwd = wd.Replace("'", ""); masterIndex.Add(nwd.ToLower()); } else { masterIndex.Add(wd.ToLower()); } } } } } var date = DateTime.Now; try { date = DateTime.Parse(doc.DocumentNode.SelectSingleNode("//head/meta[@name='pubdate']").Attributes["content"].Value); } catch { try { date = DateTime.Parse(doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']").Attributes["content"].Value); } catch { date = DateTime.Now; } } List <string> URLs = new List <string>(); try { foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]")) { string line = link.Attributes["href"].Value; if ((line.Contains(".html") || line.Contains(".htm")) && (line.Contains("cnn.com") || line.Contains("bleacherreport.com"))) { URLs.Add(line); } } for (int i = 0; i < URLs.Count; i++) { string URL = URLs[i]; System.Diagnostics.Debug.WriteLine("Link on page: " + URL); if (!CrawledURLs.Contains(URL) && VerifyLink(URL) && (URL.Contains("cnn.com") || (URL.Contains("bleacherreport.com") && URL.Contains("nba"))) && URL.Contains(".htm")) { CloudQueueMessage newQueueLink = new CloudQueueMessage(URL); DBConnect.GetLinkQueue().AddMessageAsync(newQueueLink); CrawledURLs.Add(URL); System.Diagnostics.Debug.WriteLine("Added to LinkQueue: " + URL); } } } catch { System.Diagnostics.Debug.WriteLine("No <a> tags"); } StringBuilder sb = new StringBuilder(); try { if (url.Contains("money.cnn") || url.Contains("bleacherreport")) { foreach (HtmlNode text in doc.DocumentNode.SelectNodes("//p")) { sb.Append(text.InnerText); } } else { foreach (HtmlNode text in doc.DocumentNode.SelectNodes("//*[contains(@class, 'zn-body__paragraph')]")) { sb.Append(text.InnerText); } } } catch { System.Diagnostics.Debug.WriteLine("No body text found: " + url); } /* * try * { * if (sb != null) * { * string[] words = sb.ToString().Split(new Char[] { ',', '\n', ' ', '.', '?', '!', ':', ';', '"', '(', ')', '#', '@', '%', '*', '-', '_' }); * foreach (string word in words) * { * if (!stopwords.Contains(word.ToLower()) && !masterIndex.Contains(word.ToLower()) && !masterIndex.Contains(String.Concat(word, "s").ToLower()) && !word.Contains("'") && word.Length > 1) * { * if (!masterIndex.Contains(word.Substring(0, word.Length - 1))) * { * bool valid = word.All(c => Char.IsLetter(c)); * if (valid) * { * if (Char.IsUpper(word[0])) * { * masterIndex.Add(word.ToLower()); * } * } * * } * } * } * masterIndex.Sort((x, y) => string.Compare(x, y)); * } else * { * sb.Append(" "); * } * } * catch * { * System.Diagnostics.Debug.WriteLine("Failed at reading body to masterindex"); * }*/ foreach (string key in masterIndex) { LinkRow lr = new LinkRow(key, url, title, date, sb.ToString(), imglink, ""); TableOperation insertOperation = TableOperation.InsertOrReplace(lr); try { DBConnect.GetLinkTable().ExecuteAsync(insertOperation); System.Diagnostics.Debug.WriteLine("key: " + key + " url: " + url); } catch (Exception e) { System.Diagnostics.Debug.WriteLine("Table Insert Exception: " + e + " url: " + url); } } Task.Run(() => UpdateStats("Crawling")); } catch { Error("404", url); } }
public LinkRowChangeEvent(LinkRow row, global::System.Data.DataRowAction action) { this.eventRow = row; this.eventAction = action; }
public LinkParamsRow AddLinkParamsRow(LinkRow parentLinkRowByLinkLinkParams, ParamRow parentParamRowByParamLinkParams) { LinkParamsRow rowLinkParamsRow = ((LinkParamsRow)(this.NewRow())); object[] columnValuesArray = new object[] { null, null, null}; if ((parentLinkRowByLinkLinkParams != null)) { columnValuesArray[1] = parentLinkRowByLinkLinkParams[0]; } if ((parentParamRowByParamLinkParams != null)) { columnValuesArray[2] = parentParamRowByParamLinkParams[0]; } rowLinkParamsRow.ItemArray = columnValuesArray; this.Rows.Add(rowLinkParamsRow); return rowLinkParamsRow; }
public ProcessClassLinksRow AddProcessClassLinksRow(ProcessClassRow parentProcessClassRowByProcessClassProcessClassLinks, LinkRow parentLinkRowByLinkProcessClassLinks) { ProcessClassLinksRow rowProcessClassLinksRow = ((ProcessClassLinksRow)(this.NewRow())); object[] columnValuesArray = new object[] { null, null, null}; if ((parentProcessClassRowByProcessClassProcessClassLinks != null)) { columnValuesArray[1] = parentProcessClassRowByProcessClassProcessClassLinks[0]; } if ((parentLinkRowByLinkProcessClassLinks != null)) { columnValuesArray[2] = parentLinkRowByLinkProcessClassLinks[0]; } rowProcessClassLinksRow.ItemArray = columnValuesArray; this.Rows.Add(rowProcessClassLinksRow); return rowProcessClassLinksRow; }
public void RemoveLinkRow(LinkRow row) { this.Rows.Remove(row); }
public void AddLinkRow(LinkRow row) { this.Rows.Add(row); }
public List <string> GetTitle(string q) { List <string> titledate = new List <string>(); try { TableQuery <LinkRow> rangeQuery = new TableQuery <LinkRow>() .Where( TableQuery.GenerateFilterCondition("RowKey", QueryComparisons.Equal, LinkRow.CreateMD5(q)) ); var results = DBConnect.GetLinkTable().ExecuteQuery(rangeQuery); foreach (var row in results.Take(1)) { titledate.Add(row.Title); titledate.Add("Published: " + row.DatePublished.ToString()); } return(titledate); } catch { titledate.Add("No Page Found"); titledate.Add("Try a different URL"); return(titledate); } }
/* * * public cLinkCategory this[int viIndex] * { * get { return (cLinkCategory)base.List[viIndex]; } * } */ public bool Load1() { string[] oLinkCategoryRecord = null; StreamReader oFiler = null; string sPath = null; string sReadLine = null; sPath = msFileName; int icnt = 0; // Open the file that the user picked if (File.Exists(sPath)) { oFiler = File.OpenText(sPath); while (oFiler.Peek() != -1) { icnt += 1; sReadLine = oFiler.ReadLine(); oLinkCategoryRecord = sReadLine.Split(new String[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); // oLinkCategoryRecord = sReadLine.Split(Convert.ToChar(Constants.vbCrLf)); foreach (string LinkRow in oLinkCategoryRecord) { if (LinkRow.Trim().Length > 3) { // Process a student record cLinkCategory oLinkCategory = new cLinkCategory(); string[] sLinkFields = null; sLinkFields = LinkRow.Split('|'); var _with1 = oLinkCategory; _with1.Name = sLinkFields[1]; _with1.FileName = sLinkFields[2]; _with1.LinkDefine = sLinkFields[3]; oCatagories.Add(oLinkCategory); // Add(oLinkCategory); } } return(true); } try { } catch (Exception ex) { throw ex; } // Cleanup the variables oFiler.Close(); oFiler.Dispose(); } else { File.Create("LinkCategories.txt"); if (File.Exists(sPath)) { oFiler = File.OpenText(sPath); while (oFiler.Peek() != -1) { icnt += 1; sReadLine = oFiler.ReadLine(); oLinkCategoryRecord = sReadLine.Split(new String[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); // oLinkCategoryRecord = sReadLine.Split(Convert.ToChar(Constants.vbCrLf)); foreach (string LinkRow in oLinkCategoryRecord) { if (LinkRow.Trim().Length > 3) { // Process a student record cLinkCategory oLinkCategory = new cLinkCategory(); string[] sLinkFields = null; sLinkFields = LinkRow.Split('|'); var _with2 = oLinkCategory; _with2.Name = sLinkFields[1]; _with2.FileName = sLinkFields[2]; _with2.LinkDefine = sLinkFields[3]; oCatagories.Add(oLinkCategory); // Add(oLinkCategory); } } return(true); } try { } catch (Exception ex) { throw ex; } // Cleanup the variables oFiler.Close(); oFiler.Dispose(); // MessageBox.Show("File doeesn't exist."); } oFiler = null; } return(false); }