public void ParseTVPage(string fileContent, WebBrowser browser) { List <string> tvContent = new List <string>(); int pos; while (true) { pos = fileContent.IndexOf("<!--"); if (pos == -1) { break; } int pos2 = fileContent.IndexOf("-->", pos); if (pos2 == -1) { break; } fileContent = fileContent.Remove(pos, pos2 - pos + 3); } string searchValue = "data-asin=\""; pos = fileContent.IndexOf(searchValue, 0); string tempContent = fileContent.Substring(pos); while (true) { int pos2 = tempContent.IndexOf(searchValue, 0); int pos3 = tempContent.IndexOf("</li>", pos2 + 1); string tvseries = tempContent.Substring(pos2, pos3 - pos2 + 5); tvContent.Add(tvseries); tempContent = tempContent.Substring(pos3 + 5); if (tempContent.IndexOf(searchValue) == -1) { break; } } foundItems += tvContent.Count; tbFound.Text = foundItems.ToString(); foreach (string _tvshow in tvContent) { pos = _tvshow.IndexOf("data-asin=\""); int pos2 = _tvshow.IndexOf("\"", pos + 12); string dataSin = _tvshow.Substring(pos + 11); pos2 = dataSin.IndexOf("\"", 0); dataSin = dataSin.Substring(0, pos2); Tvseries tvSeries = new Tvseries(); tvSeries.DataSin = dataSin; pos = _tvshow.IndexOf("out of 5 stars"); if (pos != -1) { pos2 = _tvshow.IndexOf("<", pos); while (pos > 0 && _tvshow[pos] != '>') { pos--; } if (_tvshow[pos] == '>') { tvSeries.Stars = _tvshow.Substring(pos + 1, pos2 - pos - 1); } } pos = _tvshow.IndexOf("src=\""); pos2 = _tvshow.IndexOf("\"", pos + 6); tvSeries.Image = _tvshow.Substring(pos + 5); pos2 = tvSeries.Image.IndexOf("\"", 0); tvSeries.Image = tvSeries.Image.Substring(0, pos2); pos = _tvshow.IndexOf("href=\""); pos2 = _tvshow.IndexOf("\"", pos + 7); tvSeries.Url = _tvshow.Substring(pos + 6); pos2 = tvSeries.Url.IndexOf("\"", 0); tvSeries.Url = tvSeries.Url.Substring(0, pos2); pos = _tvshow.IndexOf("title=\""); pos2 = _tvshow.IndexOf("\"", pos + 8); tvSeries.Title = _tvshow.Substring(pos + 7); pos2 = tvSeries.Title.IndexOf("\"", 0); tvSeries.SearchTitle = tvSeries.Title = HttpUtility.HtmlDecode(tvSeries.Title.Substring(0, pos2)); if (tvSeries.Title.ToUpper().IndexOf("THE ") == 0) { tvSeries.SearchTitle = tvSeries.Title.Substring(4) + ", " + tvSeries.Title.Substring(0, 3); } pos = _tvshow.IndexOf("a-icon-alt\">"); if (pos != -1) { pos2 = _tvshow.IndexOf("\"", pos + 11); tvSeries.Stars = _tvshow.Substring(pos + 12); pos2 = tvSeries.Stars.IndexOf("<", 0); tvSeries.Stars = tvSeries.Stars.Substring(0, pos2); } pos = 0; while (pos > -1) { pos = _tvshow.IndexOf("class=\"a-size-small a-color-secondary\">", pos + 1); if (pos != -1) { pos2 = _tvshow.IndexOf("<", pos + 1); string value = _tvshow.Substring(pos + 39, pos2 - pos - 39); if (tvSeries.Released == 0) { int year = -1; if (int.TryParse(value, out year)) { tvSeries.Released = year; } } else { switch (value) { case "CC": tvSeries.ClosedCaptioned = true; break; } } } } object existingRecord = context.Tvseries.Where(t => t.Released == tvSeries.Released && t.Title == tvSeries.Title).FirstOrDefault <Tvseries>(); if (existingRecord != null) { try { context.Remove(tvSeries); } catch (Exception) { } tvSeries = null; continue; } context.Add(tvSeries); newItems += 1; tbNew.Text = newItems.ToString(); context.SaveChanges(); } var nextButton = browser.Document.GetElementById("pagnNextLink"); if (nextButton == null) { MessageBox.Show("Update complete"); rbMovies.Enabled = true; rbTV.Enabled = true; btnSearch.Enabled = true; } else { nextButton.InvokeMember("click"); while (webBrowser1.ReadyState != WebBrowserReadyState.Complete) { Application.DoEvents(); } timer1.Start(); } }
public void ParseTVDetails(string result) { try { List <string> genres = new List <string>(); Tvseries tv = tvForDetails[itemsProcessed]; context.Tvseries.Update(tv); int pos = result.IndexOf("Watch for 0.00 with Prime"); tv.IsPrime = pos != -1; pos = result.IndexOf("av-icon--amazon_rating"); int pos2; int pos3; if (pos != -1) { pos = result.IndexOf("(", pos); pos2 = result.IndexOf(")", pos); tv.Ratings = result.Substring(pos + 1, pos2 - pos - 1); } pos = result.IndexOf("\"imdb-rating-badge\""); if (pos != -1) { pos = result.IndexOf(">", pos); pos2 = result.IndexOf("<", pos); try { tv.Imdbrating = Convert.ToSingle(result.Substring(pos + 1, pos2 - pos - 1)); } catch (Exception) { } } pos = result.IndexOf("\"synopsis\""); if (pos != -1) { pos = result.IndexOf("<p", pos); if (pos > -1) { pos = result.IndexOf(">", pos); pos2 = result.IndexOf("</p>", pos); try { tv.Plot = result.Substring(pos + 1, pos2 - pos - 1).Replace("Now included with Prime.", "").Trim(); } catch (Exception) { } } } int posProductDetails = result.IndexOf("Product details"); if (posProductDetails != -1) { int posOtherFormats = result.IndexOf("Customer Reviews", posProductDetails); int posTR = -1; pos = result.IndexOf("Genres", posProductDetails); if (pos > posOtherFormats) { pos = -1; } if (pos != -1) { posTR = result.IndexOf("<tr", pos); pos3 = result.IndexOf("</td>", pos); pos = result.IndexOf("<a", pos); tv.Genres = string.Empty; while (pos < pos3 && pos < posTR && pos != -1) { if (pos >= posOtherFormats) { break; } pos = result.IndexOf(">", pos); pos2 = result.IndexOf("</a>", pos); try { string genre = HttpUtility.HtmlDecode(result.Substring(pos + 1, pos2 - pos - 1)); genres.Add(genre); genre += ", "; tv.Genres += genre; pos = result.IndexOf("<a", pos); } catch (Exception) { } } } pos = result.IndexOf("Director", posProductDetails); if (pos != -1) { posTR = result.IndexOf("<tr", pos); pos3 = result.IndexOf("</td>", pos); pos = result.IndexOf("<a", pos); if (pos < pos3 && pos < posTR && pos != -1) { pos = result.IndexOf(">", pos); pos2 = result.IndexOf("</a>", pos); try { tv.Director = HttpUtility.HtmlDecode(result.Substring(pos + 1, pos2 - pos - 1)); } catch (Exception) { } } } pos = result.IndexOf("Starring", posProductDetails); if (pos != -1) { posTR = result.IndexOf("<tr", pos); pos3 = result.IndexOf("</td>", pos); pos = result.IndexOf("<a", pos); tv.Starring = string.Empty; while (pos < pos3 && pos < posTR && pos != -1) { if (pos >= posOtherFormats) { break; } pos = result.IndexOf(">", pos); pos2 = result.IndexOf("</a>", pos); try { string data = HttpUtility.HtmlDecode(result.Substring(pos + 1, pos2 - pos - 1)) + ", "; tv.Starring += data; pos = result.IndexOf("<a", pos); } catch (Exception) { } } if (tv.Starring != null && tv.Starring.Length > 0) { tv.Starring = tv.Starring.Replace(" ", " "); tv.Starring = tv.Starring.Substring(0, tv.Starring.Length - 2); } } pos = result.IndexOf("Supporting actors", posProductDetails); if (pos != -1) { posTR = result.IndexOf("<tr", pos); pos3 = result.IndexOf("</td>", pos); pos = result.IndexOf("<a", pos); tv.SupportingActors = string.Empty; while (pos < pos3 && pos < posTR && pos != -1) { if (pos >= posOtherFormats) { break; } pos = result.IndexOf(">", pos); pos2 = result.IndexOf("</a>", pos); try { string data = HttpUtility.HtmlDecode(result.Substring(pos + 1, pos2 - pos - 1)) + ", "; tv.SupportingActors += data; pos = result.IndexOf("<a", pos); } catch (Exception) { } } if (tv.SupportingActors != null && tv.SupportingActors.Length > 0) { tv.SupportingActors = tv.SupportingActors.Replace(" ", " "); tv.SupportingActors = tv.SupportingActors.Substring(0, tv.SupportingActors.Length - 2); } } } context.SaveChanges(); foreach (string _genre in genres) { Genre genre = context.Genre.Where(g => g.Name == _genre).SingleOrDefault(); if (genre == null) { genre = new Genre(); genre.Name = _genre; context.Add(genre); context.SaveChanges(); } TvseriesGenre tvseriesGenre = new TvseriesGenre(); tvseriesGenre.Genre = genre; tvseriesGenre.Tvseries = tv; context.Add(tvseriesGenre); context.SaveChanges(); } itemsProcessed++; lblDetailProgress.Text = $"{itemsProcessed} of {itemsToProcess} Bad URLs: {badURLs}"; if (itemsProcessed < tvForDetails.Count()) { nextURL = tvForDetails[itemsProcessed].Url; webBrowser1.Navigate(nextURL); while (webBrowser1.ReadyState != WebBrowserReadyState.Complete) { Application.DoEvents(); } timer1.Start(); } else { MessageBox.Show("Update complete"); } } catch (Exception ex) { var error = ex; } }