Beispiel #1
0
        public override void Parse(SeleniumInterface selenium)
        {
            Title       = GetNodeText(selenium, "//div[contains(@class,'address-info')]//h1");
            ListingInfo = GetNodeText(selenium, "//div[contains(@class,'address-info')]//h3");
            Amenities   = selenium.SelectNodes("//div[contains(@class,'address-info')]//li").Select(l => l.InnerText).ToList();
            Photos      = selenium.SelectNodes("//div[@id='carousel']//img").Select(l => l.GetAttributeValue("src", "")).ToList();
            Address     = GetNodeText(selenium, "//div[contains(@class, 'address-info')]/div[@class='bigText']");
            Contact     = GetNodeText(selenium, "//div[contains(@class, 'address-info')]/div[@class='medText']");
            Email       = GetNodeText(selenium, "//div[contains(@class, 'address-info')]//a[@class='mailtoLink']");
            WalkScore   = GetNodeText(selenium, "//div[@class='walkscore']//span");
            var details = selenium.SelectNodes("//div[@class='col-md-6']").Select(l => Regex.Replace(l.InnerText, "[\r\t\n]", "")).ToList();

            Rent          = GetRentalItem(details, "Square Footage");
            Location      = GetRentalItem(details, "City/Area");
            Rent          = GetRentalItem(details, "Rent");
            SquareFootage = GetRentalItem(details, "Square Footage");
            Bedrooms      = GetRentalItem(details, "Bedrooms");
            Pets          = GetRentalItem(details, "Pets");
            StructureType = GetRentalItem(details, "Structure Type");
            Parking       = GetRentalItem(details, "Parking");
            Available     = GetRentalItem(details, "Available");
            Bathrooms     = GetRentalItem(details, "Bathrooms");
            Furnished     = GetRentalItem(details, "Furnished");
            LeaseType     = GetRentalItem(details, "Lease Type");
            UnitDetails   = GetRentalItem(details, "Unit Details");
            ListingType   = GetRentalItem(details, "Listing Type");
        }
Beispiel #2
0
        void DownloadPianoVideos()
        {
            var selenium = new SeleniumInterface();
            var client   = new WebClient();

            Logger.Log("Downloading main page...");
            var completedLinks = File.ReadAllLines($"{DownloadPath}Completed.txt").ToList();
            var links          =
                selenium.GoTo("http://pianocareeracademy.com/forum/index.php/topic,207.msg1539.html")
                .SelectNodes("//div[@class='post']")
                .SelectMany(p => p.SelectNodes(".//a").Select(l => l.GetAttributeValue("href", "")))
                .Except(completedLinks)
                .ToList();

            Logger.Log($"Found {links.Count} links.");
            var linkIndex = 1;

            foreach (var link in links)
            {
                Logger.Log($"Downloading page '{link}' ({linkIndex++}/{links.Count})...");
                selenium.GoTo(link, 5000);
                var pageSaved = false;
                for (var frameIndex = 0; frameIndex < selenium.BaseFrameCount; frameIndex++)
                {
                    var videoElement = selenium.SelectNode(".//video", frameIndex);
                    if (videoElement != null && videoElement.GetAttributeValue("src") != "")
                    {
                        var title       = RemoveInvalidCharacters(selenium.SelectNode(".//div[@class='title']//div[@class='headers']//h1", frameIndex).InnerText.Trim());
                        var srcFilename = Path.GetFileNameWithoutExtension(new Uri(videoElement.GetAttributeValue("src")).LocalPath);
                        if (!Directory.EnumerateFiles(DownloadPath, "*.*", SearchOption.TopDirectoryOnly).Select(Path.GetFileName).Any(f => f.ToLower().Contains(srcFilename.ToLower())))
                        {
                            Logger.Log($"Downloading video '{title}'...");
                            File.Delete($"{DownloadPath}Temp.mp4");
                            client.DownloadFile(videoElement.GetAttributeValue("src"), $"{DownloadPath}Temp.mp4");
                            File.Move($"{DownloadPath}Temp.mp4", $"{DownloadPath}{title} ({srcFilename}).mp4");
                        }
                        else
                        {
                            Logger.Log($"Video '{title}' already downloaded.");
                        }
                        SaveWebPage(selenium, title);
                        pageSaved = true;
                    }
                }
                if (!pageSaved)
                {
                    SaveWebPage(selenium, RemoveInvalidCharacters(selenium.SelectNode(".//title").InnerText.Trim()));
                }
                completedLinks.Add(link);
                File.WriteAllLines($"{DownloadPath}Completed.txt", completedLinks);
            }
        }
Beispiel #3
0
 void SaveWebPage(SeleniumInterface selenium, string title)
 {
     Logger.Log($"Saving page {title}.htm.");
     File.WriteAllText($"{DownloadPath}{title}.htm", selenium.Driver.PageSource);
 }