public string GetAuthor(string html) { var element = Dcsoup.Parse(html); return(element.Select("a.hnuser").Text); }
public void XmlParseDefaultsToHtmlOutputSyntax() { Document doc = Dcsoup.Parse("x", "", Parser.XmlParser); Assert.AreEqual(DocumentSyntax.Xml, doc.OutputSettings.Syntax); }
public string GetTitle(string html) { var element = Dcsoup.Parse(html); return(element.Select("td.title > a.storylink").Text); }
public string GetUrl(string html) { var element = Dcsoup.Parse(html); return(element.Select("td.title > a.storylink").Attr("href")); }
public static string Search(string search, List <string> unwanted) { string sitemap = "https://www.beermerchants.com/catalog/sitemap/"; string sm = Dcsoup.Parse(new Uri(sitemap), 5000).ToString(); List <LinkItem> links = LinkFinder.Find(sm); int[] distances = new int[links.Count]; int[] matches = new int[links.Count]; List <int> PossibleIndices = new List <int>(); for (int i = 0; i < links.Count; i++) { string[] substrings = search.Split(' '); matches[i] = 0; foreach (var substring in substrings) { if (links[i].ToString().Substring(links[i].ToString().IndexOf(" ") + 1).ToUpper().Contains(substring.ToUpper())) { matches[i]++; } } bool ignore = new bool(); foreach (var item in unwanted) { if (links[i].ToString().Contains(item.ToString())) { ignore = true; break; } } if (ignore == false) { if ((search.ToLower().Contains("glass") & links[i].ToString().Contains("glass")) || (!search.ToLower().Contains("glass") & !links[i].ToString().Contains("glass"))) { distances[i] = functions.LevenshteinDistance(search, links[i].ToString().Substring(links[i].ToString().IndexOf(" ") + 1)); PossibleIndices.Add(i); } else { distances[i] = -1; } } else { distances[i] = -1; } } if (search == "RandomBeer") { int randomIndex = functions.GenIndex(PossibleIndices.Count); int secondIndex = PossibleIndices[randomIndex]; return(links[secondIndex].ToString().Substring(0, links[secondIndex].ToString().IndexOf(" "))); } int bestmatch = matches.Max(); if (bestmatch == 0) { return("Not found!"); } for (int i = 0; i < matches.Length; i++) { if (matches[i] != bestmatch) { distances[i] = -1; } } int minDist = distances.Where(x => x >= 0).Min(); int IndexOfMinDist = 0; for (int i = 0; i < distances.Length; i++) { if (distances[i] == minDist) { IndexOfMinDist = i; break; } else { } } string ClosestMatch = links[IndexOfMinDist].ToString(); return(ClosestMatch.Substring(0, ClosestMatch.IndexOf(" "))); }
public ListingPageScraper(string pageHtml) { _document = Dcsoup.Parse(pageHtml); }
public static List <string> Retrieve(string url, Channel chnl, string search) { string description = null; string beername = null; string imageurl = null; try { var page = Dcsoup.Parse(new Uri(url), 5000); description = page.GetElementById("description").Text.ToString(); beername = page.Select("h1").Text.ToString(); imageurl = page.Select("img.img-responsive.block-center").ToString(); } catch { return(null); } int srcmarker = imageurl.IndexOf("src="); int altmarker = imageurl.IndexOf("alt="); int imageurllength = altmarker - srcmarker - 7; imageurl = imageurl.Substring(srcmarker + 5, imageurllength); List <string> MessageItem = new List <string>(); MessageItem.Add(imageurl); MessageItem.Add(beername); int brwrymarker = -1; int cntrymarker = -1; int stylemarker = -1; int abvmarker = -1; int sizemarker = -1; if (description.Contains("brewery")) { brwrymarker = description.LastIndexOf("brewery"); } if (description.Contains("country")) { cntrymarker = description.LastIndexOf("country"); } if (description.Contains("styles")) { stylemarker = description.LastIndexOf("styles"); } if (description.Contains("ABV%")) { abvmarker = description.LastIndexOf("ABV%"); } if (description.Contains("Size")) { sizemarker = description.LastIndexOf("Size"); } int[] indices = { cntrymarker, stylemarker, abvmarker, brwrymarker, sizemarker }; string brewery = "brewery"; string country = "country"; string abv = "abv"; string style = "style"; if (brwrymarker != -1) { if (brwrymarker != indices.Max()) { brewery = description.Substring(brwrymarker + 8, indices.Where(x => x > brwrymarker).Min() - brwrymarker - 9); } else { brewery = description.Substring(brwrymarker + 8); } MessageItem.Add("Brewery: *" + brewery + "*"); } if (cntrymarker != -1) { if (cntrymarker != indices.Max()) { country = description.Substring(cntrymarker + 8, indices.Where(x => x > cntrymarker).Min() - cntrymarker - 14); } else { country = description.Substring(cntrymarker + 8); } MessageItem.Add("Nationality: *" + country + "*"); } if (stylemarker != -1) { if (stylemarker != indices.Max()) { style = description.Substring(stylemarker + 7, indices.Where(x => x > stylemarker).Min() - stylemarker - 8); } else { style = description.Substring(stylemarker + 7); } MessageItem.Add("Style: *" + style + "*"); } if (abvmarker != -1) { abv = description.Substring(abvmarker + 5, 5); if (abv.Contains(" ")) { abv = abv.Substring(0, abv.IndexOf(" ")); } MessageItem.Add("ABV: " + abv); } string notes = description.Substring(sizemarker); notes = notes.Substring(notes.IndexOf(" ") + 1); notes = notes.Substring(notes.IndexOf(" ") + 1); notes.Replace(' ', ','); description = description.Substring(0, indices.Where(x => x > 0).Min() - 1); if (description.Contains("Sign up to get notified")) { int cut = description.IndexOf("Sign up"); description = description.Substring(0, cut); } if (description != null) { MessageItem.Add("Description:\n```" + description + "```"); } if (notes != null) { MessageItem.Add("Notes: *" + notes + "*"); } string Message = string.Join("\n", MessageItem.ToArray()); return(MessageItem); }
private string [,] RetrieveHistoricalDataFromWiki(string historicMonth, string historicDay, string historicDataType) { switch (historicDataType) { case "Events": wikiSection = 1; break; case "Births": wikiSection = 2; break; case "Deaths": wikiSection = 3; break; case "Holidays": wikiSection = 4; break; } if (historicMonth == "january" && historicDay == "1") { wikiSection++; } try { string strFormattedWikiUrl = String.Format(strWikiUrl, historicMonth + "_" + historicDay, wikiSection); XmlDocument xmlDocWikiData = new XmlDocument(); xmlDocWikiData.Load(strFormattedWikiUrl); var wikiData = Dcsoup.Parse(xmlDocWikiData.InnerText); var selectedData = wikiData.Select("li"); listOfWikiData = new string[selectedData.Count, 2]; if (historicDataType.Equals("Holidays")) { for (int i = 0; i < selectedData.Count; i++) { listOfWikiData[i, 0] = String.Empty; if (selectedData[i].Text.Contains(":")) { listOfWikiData[i, 1] = selectedData[i].Text.Substring(0, selectedData[i].Text.IndexOf(':') + 1); } else { listOfWikiData[i, 1] = selectedData[i].Text; } } } else { for (int i = 0; i < selectedData.Count; i++) { listOfWikiData[i, 0] = selectedData[i].Text.Substring(0, selectedData[i].Text.IndexOf('–') - 1); listOfWikiData[i, 1] = selectedData[i].Text.Substring(selectedData[i].Text.IndexOf('–') + 2); } } } catch (WebException ex) { throw ex; } return(listOfWikiData); }
public Tunniplaan() { InitializeComponent(); var doc = Dcsoup.Parse(new Uri("https://tpt.siseveeb.ee/veebivormid/tunniplaan"), 5000); }
public ActionResult Index(string sortOrder, int?page) { ViewBag.CurrentSort = sortOrder; ViewBag.LikedBooks = new Dictionary <string, bool>(); var doc = Dcsoup.Parse(new Uri("http://www.alistofbooks.com/"), 5000); var bookListElem = doc.Select("ul.book-list").First; var bookList = bookListElem.ChildNodes; List <Book> books = new List <Book>(); foreach (var book in bookList) { var tempBook = Dcsoup.Parse(book.OuterHtml); int rank = 0; int.TryParse(tempBook.Select("span.ranking-num").Text, out rank); string author = tempBook.Select("p.book-author").Text.Replace("by ", ""); string title = tempBook.Select("a.book-title").Text; string imageUrl = tempBook.Select("img.book-cover").Attr("src"); string desc = tempBook.Select("p.first-words").Text.Replace("\"", ""); books.Add(new Book { Rank = rank, Author = author, Title = title, ImageUrl = imageUrl, Description = HttpUtility.HtmlDecode(desc) }); } books = books.Where(b => b.Rank != 0).ToList(); switch (sortOrder) { case "rank": books = books.OrderBy(b => b.Rank).ToList(); break; case "author": books = books.OrderBy(b => b.Author).ToList(); break; case "title": books = books.OrderBy(b => b.Title).ToList(); break; default: books = books.OrderBy(b => b.Rank).ToList(); break; } int pageSize = 3; int pageNumber = (page ?? 1); return(View(books.ToPagedList(pageNumber, pageSize))); }
private static void GenerateEmail(int eventID) { SystemInfoDataContext systemInfo; MeterInfoDataContext meterInfo; FaultLocationInfoDataContext faultInfo; EventTableAdapter eventAdapter; EventTypeTableAdapter eventTypeAdapter; EventRow eventRow; EventDataTable systemEvent; int faultTypeID; string eventDetail; XDocument htmlDocument; List <Attachment> attachments; string subject; string html; bool alreadySent; systemInfo = s_dbAdapterContainer.GetAdapter <SystemInfoDataContext>(); meterInfo = s_dbAdapterContainer.GetAdapter <MeterInfoDataContext>(); faultInfo = s_dbAdapterContainer.GetAdapter <FaultLocationInfoDataContext>(); eventAdapter = s_dbAdapterContainer.GetAdapter <EventTableAdapter>(); eventTypeAdapter = s_dbAdapterContainer.GetAdapter <EventTypeTableAdapter>(); faultTypeID = eventTypeAdapter.GetData() .Where(eventType => eventType.Name == "Fault") .Select(eventType => eventType.ID) .FirstOrDefault(); // Load the system event before the eventDetail record to avoid race conditions causing missed emails eventRow = eventAdapter.GetDataByID(eventID)[0]; systemEvent = eventAdapter.GetSystemEvent(eventRow.StartTime, eventRow.EndTime, s_timeTolerance); eventDetail = eventAdapter.GetEventDetail(eventID); List <IGrouping <int, Guid> > templateGroups; using (SqlCommand command = new SqlCommand("GetEventEmailRecipients", s_dbAdapterContainer.Connection)) using (SqlDataAdapter adapter = new SqlDataAdapter(command)) { DataTable recipientTable = new DataTable(); command.CommandType = CommandType.StoredProcedure; command.Parameters.AddWithValue("@eventID", eventID); adapter.Fill(recipientTable); templateGroups = recipientTable .Select() .GroupBy(row => row.ConvertField <int>("TemplateID"), row => row.ConvertField <Guid>("UserAccountID")) .ToList(); } foreach (IGrouping <int, Guid> templateGroup in templateGroups) { string template; List <string> recipients; using (AdoDataConnection connection = new AdoDataConnection(s_dbAdapterContainer.Connection, typeof(SqlDataAdapter), false)) { template = connection.ExecuteScalar <string>("SELECT Template FROM XSLTemplate WHERE ID = {0}", templateGroup.Key); string paramString = string.Join(",", templateGroup.Select((userAccountID, index) => $"{{{index}}}")); string sql = $"SELECT Email FROM UserAccount WHERE Email IS NOT NULL AND Email <> '' AND ID IN ({paramString})"; DataTable emailTable = connection.RetrieveData(sql, templateGroup.Cast <object>().ToArray()); recipients = emailTable.Select().Select(row => row.ConvertField <string>("Email")).ToList(); } htmlDocument = XDocument.Parse(eventDetail.ApplyXSLTransform(template), LoadOptions.PreserveWhitespace); htmlDocument.TransformAll("format", element => element.Format()); htmlDocument.TransformAll("structure", element => { string structureString = ""; string lat = "0"; string lng = "0"; try { var doc = Dcsoup.Parse(new Uri(element.Attribute("url").Value + $"?id={element.Value}"), 5000); structureString = doc.Select("span[id=strno]").Text; lat = structureString.Split('(', ',', ')')[1]; lng = structureString.Split('(', ',', ')')[2]; } catch (Exception ex) { structureString = "Structure and location unavailable..."; return(new XElement("span", structureString)); } return(new XElement(new XElement("a", new XAttribute("href", $"http://www.google.com/maps/place/{lat},{lng}"), new XElement("span", structureString)))); }); attachments = new List <Attachment>(); try { htmlDocument.TransformAll("chart", (element, index) => { string cid = $"chart{index:00}.png"; Stream image = ChartGenerator.ConvertToChartImageStream(s_dbAdapterContainer, element); Attachment attachment = new Attachment(image, cid); attachment.ContentId = attachment.Name; attachments.Add(attachment); return(new XElement("img", new XAttribute("src", $"cid:{cid}"))); }); subject = (string)htmlDocument.Descendants("title").FirstOrDefault() ?? "Fault detected by openXDA"; html = htmlDocument.ToString(SaveOptions.DisableFormatting).Replace("&", "&"); alreadySent = false; try { int sentEmailID; using (AdoDataConnection connection = new AdoDataConnection(s_dbAdapterContainer.Connection, typeof(SqlDataAdapter), false)) { string systemEventIDs = string.Join(",", systemEvent.Where(row => row.LineID == eventRow.LineID).Select(row => row.ID)); string query = $"SELECT SentEmail.ID " + $"FROM " + $" SentEmail JOIN " + $" EventSentEmail ON EventSentEmail.SentEmailID = SentEmail.ID " + $"WHERE " + $" EventSentEmail.EventID IN ({systemEventIDs}) AND " + $" SentEmail.Message = {{0}}"; sentEmailID = connection.ExecuteScalar(-1, DataExtensions.DefaultTimeoutDuration, query, html); } alreadySent = (sentEmailID != -1); if (!alreadySent) { sentEmailID = LoadSentEmail(recipients, subject, html); } LoadEventSentEmail(eventRow, systemEvent, sentEmailID); } catch (Exception ex) { // Failure to load the email into the database should // not prevent us from attempting to send the email Log.Error(ex.Message, ex); } if (!alreadySent) { SendEmail(recipients, subject, html, attachments); } } finally { foreach (Attachment attachment in attachments) { attachment.Dispose(); } } } if (templateGroups.Any()) { Log.Info($"All emails sent for event ID {eventID}."); } }