public override void Parse(SeleniumInterface selenium) { Title = GetNodeText(selenium, "//div[contains(@class,'address-info')]//h1"); ListingInfo = GetNodeText(selenium, "//div[contains(@class,'address-info')]//h3"); Amenities = selenium.SelectNodes("//div[contains(@class,'address-info')]//li").Select(l => l.InnerText).ToList(); Photos = selenium.SelectNodes("//div[@id='carousel']//img").Select(l => l.GetAttributeValue("src", "")).ToList(); Address = GetNodeText(selenium, "//div[contains(@class, 'address-info')]/div[@class='bigText']"); Contact = GetNodeText(selenium, "//div[contains(@class, 'address-info')]/div[@class='medText']"); Email = GetNodeText(selenium, "//div[contains(@class, 'address-info')]//a[@class='mailtoLink']"); WalkScore = GetNodeText(selenium, "//div[@class='walkscore']//span"); var details = selenium.SelectNodes("//div[@class='col-md-6']").Select(l => Regex.Replace(l.InnerText, "[\r\t\n]", "")).ToList(); Rent = GetRentalItem(details, "Square Footage"); Location = GetRentalItem(details, "City/Area"); Rent = GetRentalItem(details, "Rent"); SquareFootage = GetRentalItem(details, "Square Footage"); Bedrooms = GetRentalItem(details, "Bedrooms"); Pets = GetRentalItem(details, "Pets"); StructureType = GetRentalItem(details, "Structure Type"); Parking = GetRentalItem(details, "Parking"); Available = GetRentalItem(details, "Available"); Bathrooms = GetRentalItem(details, "Bathrooms"); Furnished = GetRentalItem(details, "Furnished"); LeaseType = GetRentalItem(details, "Lease Type"); UnitDetails = GetRentalItem(details, "Unit Details"); ListingType = GetRentalItem(details, "Listing Type"); }
void DownloadPianoVideos() { var selenium = new SeleniumInterface(); var client = new WebClient(); Logger.Log("Downloading main page..."); var completedLinks = File.ReadAllLines($"{DownloadPath}Completed.txt").ToList(); var links = selenium.GoTo("http://pianocareeracademy.com/forum/index.php/topic,207.msg1539.html") .SelectNodes("//div[@class='post']") .SelectMany(p => p.SelectNodes(".//a").Select(l => l.GetAttributeValue("href", ""))) .Except(completedLinks) .ToList(); Logger.Log($"Found {links.Count} links."); var linkIndex = 1; foreach (var link in links) { Logger.Log($"Downloading page '{link}' ({linkIndex++}/{links.Count})..."); selenium.GoTo(link, 5000); var pageSaved = false; for (var frameIndex = 0; frameIndex < selenium.BaseFrameCount; frameIndex++) { var videoElement = selenium.SelectNode(".//video", frameIndex); if (videoElement != null && videoElement.GetAttributeValue("src") != "") { var title = RemoveInvalidCharacters(selenium.SelectNode(".//div[@class='title']//div[@class='headers']//h1", frameIndex).InnerText.Trim()); var srcFilename = Path.GetFileNameWithoutExtension(new Uri(videoElement.GetAttributeValue("src")).LocalPath); if (!Directory.EnumerateFiles(DownloadPath, "*.*", SearchOption.TopDirectoryOnly).Select(Path.GetFileName).Any(f => f.ToLower().Contains(srcFilename.ToLower()))) { Logger.Log($"Downloading video '{title}'..."); File.Delete($"{DownloadPath}Temp.mp4"); client.DownloadFile(videoElement.GetAttributeValue("src"), $"{DownloadPath}Temp.mp4"); File.Move($"{DownloadPath}Temp.mp4", $"{DownloadPath}{title} ({srcFilename}).mp4"); } else { Logger.Log($"Video '{title}' already downloaded."); } SaveWebPage(selenium, title); pageSaved = true; } } if (!pageSaved) { SaveWebPage(selenium, RemoveInvalidCharacters(selenium.SelectNode(".//title").InnerText.Trim())); } completedLinks.Add(link); File.WriteAllLines($"{DownloadPath}Completed.txt", completedLinks); } }
void SaveWebPage(SeleniumInterface selenium, string title) { Logger.Log($"Saving page {title}.htm."); File.WriteAllText($"{DownloadPath}{title}.htm", selenium.Driver.PageSource); }