private void SetFinancialRatios( HtmlNodeCollection nodes ) { if ( nodes == null || nodes.Count( ) == 0 ) return; // First Column contains ([] represents index in array): // Range[0], 52Week[1], Open[2], Vol/Avg[3], Market Cap[4], P/E[5] string [] firstCol = nodes [ 0 ].ChildNodes.Where( i => i.InnerText != "\n" ) .Select( ( i ) => i.InnerText ).ToArray<string>( ).Select( i => i.Replace( "\n", " " ) .Substring( 1, i.Length - 1 ) ).ToArray<string>( ); var range = ParseRange( firstCol [ 0 ] ); RangeLow = range [ 0 ]; RangeHigh = range [ 1 ]; var _52Week = Parse52Week( firstCol [ 1 ] ); FiftyTwoWeekLow = _52Week [ 0 ]; FiftyTwoWeekHigh = _52Week [ 1 ]; Open = ParseOpen( firstCol [ 2 ] ) != null ? Convert.ToDouble( ParseOpen( firstCol [ 2 ] ) ) : 0; var vol = ParseVolume( firstCol [ 3 ] ); VolumeAverage = vol [ 0 ]; VolumeTotal = vol [ 1 ]; MarketCap = ParseMarketCap( firstCol [ 4 ] ); PriceEarnings = ParsePriceEarnings( firstCol [ 5 ] ); // Second Column contains ([] represents index in array): // Div/Yield[0], EPS[1], Shares[2], Beta[3], Instituional Ownership[4] string [] secondCol = nodes [ 1 ].ChildNodes.Where( i => i.InnerText != "\n" ) .Select( i => i.InnerText ).ToArray<string>( ) .Select( i => i.Replace( "\n", " " ) ).ToArray<string>( ); var divs = ParseDividend( secondCol [ 0 ] ); Dividend = divs != null ? divs [ 0 ] : null; DividendYield = divs != null ? divs [ 1 ] : null; EarningsPerShare = ParseEarningsPerShare( secondCol [ 1 ] ); if ( secondCol.Length >= 4 ) { Shares = ParseShares( secondCol [ 2 ] ); Beta = ParseBeta( secondCol [ 3 ] ); InstitutionalOwnership = ParseInstituionalOwnership( secondCol [ 4 ] ); } else { // TODO: Implement sparse information case here! } }
private void SetSectorsAndIndustries( HtmlNodeCollection nodes ) { if ( nodes.Count( ) < 8 ) return; var externalLinks = nodes [ 5 ].ChildNodes.Select( i => new { i.InnerText, i.InnerHtml } ); var sectorAndIndustry = nodes [ 6 ].ChildNodes.Where( i => i.InnerText != "\n" ).Select( i => i.InnerText ).ToArray<string>( ); int sectorIndex = sectorAndIndustry [ 0 ].IndexOf( "Sector:" ) + 7; int sectorIndexEnd = sectorAndIndustry [ 0 ].IndexOf( " >" ); if ( sectorIndexEnd == -1 ) return; int industryIndex = sectorAndIndustry [ 0 ].IndexOf( "Industry:" ) + 9; int endingMark = sectorAndIndustry [ 0 ].IndexOf( "\n\n" ) + 9; Sector = sectorAndIndustry [ 0 ].Substring( sectorIndex + 1, sectorIndexEnd - sectorIndex - 1 ).Replace( "&", "&" ); Industry = sectorAndIndustry [ 0 ].Substring( industryIndex + 1, endingMark - industryIndex - 10 ).Replace( "&", "&" ); Description = nodes [ 7 ].ChildNodes.Where( i => i.InnerText != "\n" ).Select( i => i.InnerText ).ToArray<string>( ) [ 0 ].Replace( "More from Reuters »", "" ).Replace( "\n", "" ); }
protected override bool FilterPage(HtmlNodeCollection pictureHtmlNode, ref int pageNum) { if (base.FilterPage(pictureHtmlNode, ref pageNum)) { return true; } if (pictureHtmlNode.Count() <= 1) { var picNode = pictureHtmlNode.FirstOrDefault(); if (picNode == null) { pageNum = 500; return true; } var picturePathName = picNode.Attributes["src"].Value; if (picturePathName.Contains("bctp_28.gif")) { pageNum = 500; return true; } } return false; }
private void SetRelatedPersons( HtmlNodeCollection nodes ) { var baseQ = nodes [ nodes.Count( ) - 1 ].SelectSingleNode( ".//table[contains(@class, 'id-mgmt-table')]" ); if ( baseQ == null ) return; OfficersAndDirectors = new List<ImportantPerson>( ); var ppl = nodes [ nodes.Count( ) - 1 ].SelectSingleNode( ".//table[contains(@class, 'id-mgmt-table')]" ) .SelectNodes( ".//tr//td[contains(@class, 'p ')]" ).Select( i => i.InnerText.Replace( "\n\n", ": " ) .Replace( "\n", "" ) ).ToArray<string>( ); foreach ( string person in ppl ) { var b = person.Split( ':' ); OfficersAndDirectors.Add( new ImportantPerson { Name = b [ 0 ], Role = b [ 1 ].Substring( 1 ) } ); } }
void ExtractUsers(string documentText) { try { HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(documentText); HtmlAgilityPack.HtmlNode bloodDonor = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regmain']"); HtmlAgilityPack.HtmlNode linkPage = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regPage']/div/div/div/a[4]"); //linkPageCount = linkPage.InnerLength; HtmlAgilityPack.HtmlNode userTable = htmlDocument.DocumentNode.SelectSingleNode("//*[@id='regPage']/div/table"); HtmlAgilityPack.HtmlNodeCollection userTableCount = userTable.SelectNodes("./tr"); for (int k = 2; k <= userTableCount.Count(); k++) { HtmlAgilityPack.HtmlNode moreBtn = userTable.SelectSingleNode("./tr[" + k + "]/td[6]/a"); string linkBtn = moreBtn.GetAttributeValue("onclick", null); string[] mainLink = linkBtn.Split('b'); string moreLink = mainLink[1]; using (WebClient client = new WebClient()) { var link = client.DownloadString("http://bloodhelpers.com/b" + moreLink); HtmlAgilityPack.HtmlDocument userDocument = new HtmlAgilityPack.HtmlDocument(); userDocument.LoadHtml(link); HtmlAgilityPack.HtmlNode selectTable = userDocument.DocumentNode.SelectSingleNode("//*[@id='search']/form/table"); //User Name HtmlAgilityPack.HtmlNode userName = selectTable.SelectSingleNode("./tr[1]/td[2]"); string Name = userName.InnerText; //User Email HtmlAgilityPack.HtmlNode userEmail = selectTable.SelectSingleNode("./tr[2]/td[2]"); string[] Mail = userEmail.InnerHtml.Split('='); string addMail = Mail[1] + "=" + Mail[2]; string Email = addMail.Replace("border", ""); //fuction to convert png image into jpg //User BloodGroup HtmlAgilityPack.HtmlNode userBloodGroup = selectTable.SelectSingleNode("./tr[3]/td[2]"); string BloodGroup = userBloodGroup.InnerText; //User Gender HtmlAgilityPack.HtmlNode userGender = selectTable.SelectSingleNode("./tr[4]/td[2]"); string Gender = userGender.InnerText; //User Age HtmlAgilityPack.HtmlNode userAge = selectTable.SelectSingleNode("./tr[5]/td[2]"); int Age = Int32.Parse(userAge.InnerText.Replace("Years", "")); //User City HtmlAgilityPack.HtmlNode userCity = selectTable.SelectSingleNode("./tr[6]/td[2]"); string City = userCity.InnerText; //User Mobile Number HtmlAgilityPack.HtmlNode userMobile = selectTable.SelectSingleNode("./tr[7]/td[2]"); string[] num = userMobile.InnerHtml.Split('='); string addNum = num[1] + "=" + num[2]; string mobileNumber = addNum.Replace("border", ""); //function to convert png image into jpg //User Land Line Number HtmlAgilityPack.HtmlNode userLandLine = selectTable.SelectSingleNode("./tr[8]/td[2]"); string[] landNum = userLandLine.InnerHtml.Split('='); string addLandNum = landNum[1] + "=" + landNum[2]; string landLineNum = addLandNum.Replace("border", ""); //Function to convert png image into jpg //User Last Donation Date HtmlAgilityPack.HtmlNode userLastDonationDate = selectTable.SelectSingleNode("./tr[9]/td[2]"); string LastDonationDate = userLastDonationDate.InnerText; //Store Data in DataBase StoreUserData(Name, Email, BloodGroup, Gender, Age, City, mobileNumber, landLineNum, LastDonationDate); } } } catch { } }