示例#1
0
        public List <string> Preview(string key, string url)
        {
            List <string> results       = new List <string>();
            string        rkLowerFilter = TableQuery.GenerateFilterCondition("RowKey", QueryComparisons.Equal, LinkRow.CreateMD5(url));

            string pkUpperFilter = TableQuery.GenerateFilterCondition("PartitionKey", QueryComparisons.Equal, key);

            // Note CombineFilters has the effect of “([Expression1]) Operator (Expression2]), as such passing in a complex expression will result in a logical grouping.
            string combinedFilter = TableQuery.CombineFilters(rkLowerFilter, TableOperators.And, pkUpperFilter);

            try
            {
                TableQuery <LinkRow> rangeQuery = new TableQuery <LinkRow>()
                                                  .Where(combinedFilter);
                var     result = DBConnect.GetLinkTable().ExecuteQuery(rangeQuery).Take(1).ToList();
                LinkRow res    = result[0];
                results.Add(res.Title);
                results.Add(res.URL);
                results.Add(res.Img);
                results.Add(res.Body);
            }
            catch { results.Add("No Preview Found"); }

            return(results);
        }
示例#2
0
        void AddLinkRow(SyncLink link)
        {
            LinkRow ll = new LinkRow(link);

            ll.LinkDeletionRequested     += delegate { DataManager.RemoveLink(link.Title); };
            ll.EditLinkRequested         += delegate { new LinkDataForm(link).ShowDialog(); };
            ll.LinkRowSelected           += delegate { SelectLinkRow(link); };
            ll.SyncStartRequested        += delegate { StartSync(link); };
            ll.SyncCancellationRequested += delegate { CancelSync(link); };

            ll.Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right;
            ll.Width  = dataTable.Width;

            _linkRows.Add(ll);
            dataTable.Controls.Add(ll, 0, _linkRows.Count);
        }
示例#3
0
        /// <summary>
        /// update general synchronisation info
        /// </summary>
        void UpdateSyncInfo()
        {
            //update total progress info
            float tp = DataManager.GetTotalProgress();

            progressBar_total.Value = tp;
            label_p.Text            = $"{tp:0.0}%";

            //update each sync data of link
            foreach (SyncLink l in DataManager.Links)
            {
                LinkRow linkRow = GetLinkRow(l);

                if (l.SyncInfo == null)
                {
                    continue;
                }

                linkRow.UpdateSyncData();
            }
        }
示例#4
0
        private int calculateScore(LinkRow res, List <string> words)
        {
            int  total     = 0;
            bool wikiBonus = true;

            foreach (string word in words)
            {
                if (res.Title.ToLower().Contains(word) && !res.URL.Contains("wikipedia"))
                {
                    total = total + 1;
                }
                if (!res.Title.ToLower().Contains(word) && res.URL.Contains("wikipedia"))
                {
                    wikiBonus = false;
                }
            }
            if (res.URL.Contains("wikipedia") && wikiBonus)
            {
                total = total + 4;
            }
            return(total);
        }
示例#5
0
        public static void ProcessHTML(string url)
        {
            System.Diagnostics.Debug.WriteLine("ProcessHTML");
            try
            {
                HtmlWeb hw = new HtmlWeb();
                System.Diagnostics.Debug.WriteLine("HTML Loaded");
                HtmlDocument doc     = hw.Load(url);
                string       imglink = "none";
                try
                {
                    var img = doc.DocumentNode.SelectNodes("//img[contains(@class, 'media__image')]").FirstOrDefault();
                    imglink = img.Attributes["src"].Value;
                }
                catch
                {
                    imglink = "none";
                }
                var title = "no title found";
                try
                {
                    title = doc.DocumentNode.SelectSingleNode("//head/title").InnerText;
                }
                catch
                {
                    try
                    {
                        title = doc.DocumentNode.Descendants("title").FirstOrDefault().InnerText;
                    }
                    catch
                    {
                        title = "no title found";
                    }
                }
                List <string> masterIndex = new List <string>();
                if (title != "no title found")
                {
                    string[] titleWords = title.Split(new Char[] { ' ', ',', '.', ':', '!', '?', '-', '%', '"' });
                    foreach (string wd in titleWords)
                    {
                        if (!stopwordstitle.Contains(wd.ToLower()) && !masterIndex.Contains(wd.ToLower()) && !masterIndex.Contains(String.Concat(wd, "s").ToLower()) && wd.Length > 1)
                        {
                            if (!masterIndex.Contains(wd.Substring(0, wd.Length - 1)))
                            {
                                if (wd.Contains("'"))
                                {
                                    string nwd = wd.Replace("'", "");
                                    masterIndex.Add(nwd.ToLower());
                                }
                                else
                                {
                                    masterIndex.Add(wd.ToLower());
                                }
                            }
                        }
                    }
                }
                var date = DateTime.Now;
                try
                {
                    date = DateTime.Parse(doc.DocumentNode.SelectSingleNode("//head/meta[@name='pubdate']").Attributes["content"].Value);
                }
                catch
                {
                    try
                    {
                        date = DateTime.Parse(doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']").Attributes["content"].Value);
                    }
                    catch
                    {
                        date = DateTime.Now;
                    }
                }

                List <string> URLs = new List <string>();
                try
                {
                    foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
                    {
                        string line = link.Attributes["href"].Value;
                        if ((line.Contains(".html") || line.Contains(".htm")) && (line.Contains("cnn.com") || line.Contains("bleacherreport.com")))
                        {
                            URLs.Add(line);
                        }
                    }
                    for (int i = 0; i < URLs.Count; i++)
                    {
                        string URL = URLs[i];
                        System.Diagnostics.Debug.WriteLine("Link on page: " + URL);
                        if (!CrawledURLs.Contains(URL) && VerifyLink(URL) &&
                            (URL.Contains("cnn.com") || (URL.Contains("bleacherreport.com") && URL.Contains("nba"))) &&
                            URL.Contains(".htm"))
                        {
                            CloudQueueMessage newQueueLink = new CloudQueueMessage(URL);
                            DBConnect.GetLinkQueue().AddMessageAsync(newQueueLink);
                            CrawledURLs.Add(URL);
                            System.Diagnostics.Debug.WriteLine("Added to LinkQueue: " + URL);
                        }
                    }
                }
                catch
                {
                    System.Diagnostics.Debug.WriteLine("No <a> tags");
                }
                StringBuilder sb = new StringBuilder();
                try
                {
                    if (url.Contains("money.cnn") || url.Contains("bleacherreport"))
                    {
                        foreach (HtmlNode text in doc.DocumentNode.SelectNodes("//p"))
                        {
                            sb.Append(text.InnerText);
                        }
                    }
                    else
                    {
                        foreach (HtmlNode text in doc.DocumentNode.SelectNodes("//*[contains(@class, 'zn-body__paragraph')]"))
                        {
                            sb.Append(text.InnerText);
                        }
                    }
                }
                catch
                {
                    System.Diagnostics.Debug.WriteLine("No body text found: " + url);
                }

                /*
                 * try
                 * {
                 *  if (sb != null)
                 *  {
                 *      string[] words = sb.ToString().Split(new Char[] { ',', '\n', ' ', '.', '?', '!', ':', ';', '"', '(', ')', '#', '@', '%', '*', '-', '_' });
                 *      foreach (string word in words)
                 *      {
                 *          if (!stopwords.Contains(word.ToLower()) && !masterIndex.Contains(word.ToLower()) && !masterIndex.Contains(String.Concat(word, "s").ToLower()) && !word.Contains("'") && word.Length > 1)
                 *          {
                 *              if (!masterIndex.Contains(word.Substring(0, word.Length - 1)))
                 *              {
                 *                  bool valid = word.All(c => Char.IsLetter(c));
                 *                  if (valid)
                 *                  {
                 *                      if (Char.IsUpper(word[0]))
                 *                      {
                 *                          masterIndex.Add(word.ToLower());
                 *                      }
                 *                  }
                 *
                 *              }
                 *          }
                 *      }
                 *      masterIndex.Sort((x, y) => string.Compare(x, y));
                 *  } else
                 *  {
                 *      sb.Append(" ");
                 *  }
                 * }
                 * catch
                 * {
                 *  System.Diagnostics.Debug.WriteLine("Failed at reading body to masterindex");
                 * }*/
                foreach (string key in masterIndex)
                {
                    LinkRow        lr = new LinkRow(key, url, title, date, sb.ToString(), imglink, "");
                    TableOperation insertOperation = TableOperation.InsertOrReplace(lr);
                    try
                    {
                        DBConnect.GetLinkTable().ExecuteAsync(insertOperation);
                        System.Diagnostics.Debug.WriteLine("key: " + key + " url: " + url);
                    }
                    catch (Exception e)
                    {
                        System.Diagnostics.Debug.WriteLine("Table Insert Exception:  " + e + " url: " + url);
                    }
                }
                Task.Run(() => UpdateStats("Crawling"));
            }
            catch
            {
                Error("404", url);
            }
        }
 public LinkRowChangeEvent(LinkRow row, global::System.Data.DataRowAction action) {
     this.eventRow = row;
     this.eventAction = action;
 }
 public LinkParamsRow AddLinkParamsRow(LinkRow parentLinkRowByLinkLinkParams, ParamRow parentParamRowByParamLinkParams) {
     LinkParamsRow rowLinkParamsRow = ((LinkParamsRow)(this.NewRow()));
     object[] columnValuesArray = new object[] {
             null,
             null,
             null};
     if ((parentLinkRowByLinkLinkParams != null)) {
         columnValuesArray[1] = parentLinkRowByLinkLinkParams[0];
     }
     if ((parentParamRowByParamLinkParams != null)) {
         columnValuesArray[2] = parentParamRowByParamLinkParams[0];
     }
     rowLinkParamsRow.ItemArray = columnValuesArray;
     this.Rows.Add(rowLinkParamsRow);
     return rowLinkParamsRow;
 }
 public ProcessClassLinksRow AddProcessClassLinksRow(ProcessClassRow parentProcessClassRowByProcessClassProcessClassLinks, LinkRow parentLinkRowByLinkProcessClassLinks) {
     ProcessClassLinksRow rowProcessClassLinksRow = ((ProcessClassLinksRow)(this.NewRow()));
     object[] columnValuesArray = new object[] {
             null,
             null,
             null};
     if ((parentProcessClassRowByProcessClassProcessClassLinks != null)) {
         columnValuesArray[1] = parentProcessClassRowByProcessClassProcessClassLinks[0];
     }
     if ((parentLinkRowByLinkProcessClassLinks != null)) {
         columnValuesArray[2] = parentLinkRowByLinkProcessClassLinks[0];
     }
     rowProcessClassLinksRow.ItemArray = columnValuesArray;
     this.Rows.Add(rowProcessClassLinksRow);
     return rowProcessClassLinksRow;
 }
 public void RemoveLinkRow(LinkRow row) {
     this.Rows.Remove(row);
 }
 public void AddLinkRow(LinkRow row) {
     this.Rows.Add(row);
 }
示例#11
0
        public List <string> GetTitle(string q)
        {
            List <string> titledate = new List <string>();

            try
            {
                TableQuery <LinkRow> rangeQuery = new TableQuery <LinkRow>()
                                                  .Where(
                    TableQuery.GenerateFilterCondition("RowKey", QueryComparisons.Equal, LinkRow.CreateMD5(q))
                    );

                var results = DBConnect.GetLinkTable().ExecuteQuery(rangeQuery);

                foreach (var row in results.Take(1))
                {
                    titledate.Add(row.Title);
                    titledate.Add("Published: " + row.DatePublished.ToString());
                }

                return(titledate);
            }
            catch
            {
                titledate.Add("No Page Found");
                titledate.Add("Try a different URL");
                return(titledate);
            }
        }
示例#12
0
        /*
         *
         * public cLinkCategory this[int viIndex]
         * {
         *  get { return (cLinkCategory)base.List[viIndex]; }
         * }
         */
        public bool Load1()
        {
            string[]     oLinkCategoryRecord = null;
            StreamReader oFiler    = null;
            string       sPath     = null;
            string       sReadLine = null;

            sPath = msFileName;
            int icnt = 0;

            // Open the file that the user picked



            if (File.Exists(sPath))
            {
                oFiler = File.OpenText(sPath);

                while (oFiler.Peek() != -1)
                {
                    icnt               += 1;
                    sReadLine           = oFiler.ReadLine();
                    oLinkCategoryRecord = sReadLine.Split(new String[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
                    //  oLinkCategoryRecord = sReadLine.Split(Convert.ToChar(Constants.vbCrLf));


                    foreach (string LinkRow in oLinkCategoryRecord)
                    {
                        if (LinkRow.Trim().Length > 3)
                        {
                            // Process a student record
                            cLinkCategory oLinkCategory = new cLinkCategory();
                            string[]      sLinkFields   = null;

                            sLinkFields = LinkRow.Split('|');

                            var _with1 = oLinkCategory;
                            _with1.Name       = sLinkFields[1];
                            _with1.FileName   = sLinkFields[2];
                            _with1.LinkDefine = sLinkFields[3];
                            oCatagories.Add(oLinkCategory);
                            // Add(oLinkCategory);
                        }
                    }
                    return(true);
                }

                try
                {
                }
                catch (Exception ex)
                {
                    throw ex;
                }



                // Cleanup the variables
                oFiler.Close();
                oFiler.Dispose();
            }
            else
            {
                File.Create("LinkCategories.txt");


                if (File.Exists(sPath))
                {
                    oFiler = File.OpenText(sPath);

                    while (oFiler.Peek() != -1)
                    {
                        icnt               += 1;
                        sReadLine           = oFiler.ReadLine();
                        oLinkCategoryRecord = sReadLine.Split(new String[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);


                        //  oLinkCategoryRecord = sReadLine.Split(Convert.ToChar(Constants.vbCrLf));


                        foreach (string LinkRow in oLinkCategoryRecord)
                        {
                            if (LinkRow.Trim().Length > 3)
                            {
                                // Process a student record
                                cLinkCategory oLinkCategory = new cLinkCategory();
                                string[]      sLinkFields   = null;

                                sLinkFields = LinkRow.Split('|');

                                var _with2 = oLinkCategory;
                                _with2.Name       = sLinkFields[1];
                                _with2.FileName   = sLinkFields[2];
                                _with2.LinkDefine = sLinkFields[3];
                                oCatagories.Add(oLinkCategory);
                                // Add(oLinkCategory);
                            }
                        }
                        return(true);
                    }

                    try
                    {
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }



                    // Cleanup the variables
                    oFiler.Close();
                    oFiler.Dispose();



                    // MessageBox.Show("File doeesn't exist.");
                }

                oFiler = null;
            }

            return(false);
        }