private void getPostalCodeAndPhone(WebsiteWithPostal web) { try { WebClient client = new WebClient(); HtmlAgilityPack.HtmlDocument document = new HtmlAgilityPack.HtmlDocument(); string downloadString = client.DownloadString(web.URL); document.LoadHtml(downloadString.ToLower()); var select = document.DocumentNode.SelectNodes("//meta[@http-equiv='refresh' and contains(@content, 'url')]"); if (select != null) { web.URL = select[0].Attributes["content"].Value.Split('=')[1]; downloadString = client.DownloadString(web.URL); document.LoadHtml(downloadString); } HtmlAgilityPack.HtmlNodeCollection bodyNodes = document.DocumentNode.SelectNodes("//body"); if (bodyNodes == null) { web.PostalCode = "Ingen <body> på siden."; } else { string innerText = document.DocumentNode.SelectNodes("//body")[0].InnerText; web.PostalCode = getPostal(innerText); web.Phone = getPhone(innerText); if (web.Phone == "" || web.PostalCode == "") { web.getSubSites(); foreach (String sub in web.subsites) { downloadString = client.DownloadString(web.URL.TrimEnd('/') + sub); document.LoadHtml(downloadString); innerText = document.DocumentNode.SelectNodes("//body")[0].InnerText; if (web.Phone == "") { web.Phone = getPhone(innerText); } if (web.PostalCode == "") { web.PostalCode = getPostal(innerText); } if (web.PostalCode != "" && web.Phone != "") { break; } } } if (web.Phone == "") { web.Phone = "Intet fundet!"; } if (web.PostalCode == "") { web.PostalCode = "Intet fundet!"; } } } catch (WebException) { web.PostalCode = "Kunne ikke finde siden"; } catch (Exception e) { web.PostalCode = e.Message; } }
private void ReadExcel() { try { System.IO.FileInfo file = new System.IO.FileInfo(txtFilePath.Text); if (file.Exists) { var package = new ExcelPackage(file); ExcelWorksheet ws = package.Workbook.Worksheets[1]; var endRow = ws.Dimension.End.Row; var lastRow = 0; for (int i = 1; i <= endRow; i++) { if (worker.CancellationPending) { break; } SetMessage($"Kører række {i} af {endRow}"); string link = ws.Cells[i, 1].Text.Trim(); if (link != "") { string address = ws.Cells[i, 1].Address; WebsiteWithPostal web = new WebsiteWithPostal { URL = link, Address = address }; sites.Add(web); getPostalCodeAndPhone(web); ws.Cells[i, 2].Value = web.PostalCode; ws.Cells[i, 3].Value = web.Phone; } if (i % 10 == 0) { package.Save(); } lastRow = i; } package.Save(); SetMessage($"{lastRow} rækker kørt og gemt!"); } else { throw new ArgumentException(); } } catch (ArgumentException) { MessageBox.Show("Kunne ikke finde filen"); } catch (System.IO.IOException e) { MessageBox.Show(e.Message); } finally { btnRun.Enabled = true; } }