public List<Company> GetCompanies() { List<Company> companyList = new List<Company> (); RestClient client = new RestClient (@"http://internship.iitm.ac.in/students/comp_list.php"); RestRequest request = new RestRequest (Method.GET); request.AddCookie (cookie.Name, cookie.Value); IRestResponse response = client.Execute (request); if (response.StatusCode != HttpStatusCode.OK) return new List<Company> (); string cleanHtml = CleanHtml (response.Content); //Parse table into Company objects HtmlDocument htmlDoc = new HtmlDocument (); htmlDoc.LoadHtml (cleanHtml); HtmlNode companyTable = htmlDoc.DocumentNode.SelectNodes ("//table") [3]; ConArt.Out ("Starting data collection..."); //Select rows and parse data within HtmlNodeCollection rowsList = companyTable.SelectNodes ("tr"); for (int i = 1; i < rowsList.Count; ++i) { HtmlNodeCollection columnList = rowsList [i].SelectNodes ("td"); Company company = new Company (); company.Name = columnList [1].InnerText.Replace ("\n", ""); company.Profile = columnList [2].InnerText; string profileAddress = columnList [2].SelectSingleNode ("a").GetAttributeValue ("href", ""); company.DetailUri = new Uri ("http://internship.iitm.ac.in/students/" + HttpUtility.HtmlDecode (profileAddress), UriKind.Absolute); DateTime.TryParse (columnList [3].InnerText, out company.TalkDate); DateTime.TryParse (columnList [4].InnerText, out company.ResumeDeadline); DateTime.TryParse (columnList [5].InnerText, out company.TestDate); DateTime.TryParse (columnList [6].InnerText, out company.GdDate); DateTime.TryParse (columnList [7].InnerText, out company.TalkDate); company.Status = Parsers.ParseStatus (columnList [7].InnerText); company.DetailSnippet = GetCompanyDetails (company); companyList.Add (company); //Update progress bar Console.CursorLeft = 0; Console.Write ( (int)i * 100 / rowsList.Count + "% Complete" ); } Console.CursorLeft = 0; Console.WriteLine(" "); return companyList; }
string GetCompanyDetails(Company company) { RestClient client = new RestClient (company.DetailUri.ToString ()); RestRequest request = new RestRequest (Method.GET); request.AddCookie (cookie.Name, cookie.Value); IRestResponse response = client.Execute (request); if (response.StatusCode != HttpStatusCode.OK) return string.Empty; string cleanHtml = CleanHtml (response.Content); HtmlDocument htmlDoc = new HtmlDocument (); htmlDoc.LoadHtml (cleanHtml); return HttpUtility.HtmlDecode (htmlDoc.DocumentNode.InnerText.Replace ("\n", string.Empty).Replace ("\r", string.Empty)); }