void ParserIMDB_MoviePage(string pageContent) { Movie movie = new Movie(); using (StringReader stringReader = new StringReader(pageContent)) { List<string> Lines = new List<string>(); string line; while ((line = stringReader.ReadLine()) != null) Lines.Add(line.Trim()); for (int i = 0; i < Lines.Count - 1; i++) { #region Name if (movie.Name == null) { //string mask = @"<meta property='og:title' content=""*"""; string mask = @"<h1 class=""header""> <span class=""itemprop"" itemprop=""name"">*</span>"; if (Lines[i].EqualByStrMask(mask)) movie.Name = Lines[i].MaskedStrCopy(mask).Trim(); } #endregion #region Name2 if (movie.Name2 == null) { if (Lines[i] == @"<br/><span class=""title-extra"" itemprop=""name"">") for (int ii = i + 1; ii < Lines.Count - 1; ii++) { string mask = @"""*"""; if (Lines[ii].EqualByStrMask(mask)) { movie.Name2 = Lines[ii].MaskedStrCopy(mask).Trim(); break; } } if (Lines[i].IndexOf("Also Known As:") > -1) for (int ii = i + 1; ii < Lines.Count - 1; ii++) { string mask = @"<h4 class=""inline"">Also Known As:</h4> "; if (Lines[ii].EqualByStrMask(mask)) { movie.Name2 = Lines[ii].MaskedStrCopy(mask).Trim(); break; } } } #endregion #region Year#1 if (movie.Year == null) { string mask = @"/?ref_=tt_ov_inf"" >"; if (Lines[i].IndexOf(mask) > -1) movie.Year = Lines[i].Substring(Lines[i].IndexOf(mask) + mask.Length, Lines[i].IndexOf("</a>)</span>") - Lines[i].IndexOf(mask) - mask.Length); } #endregion #region PicUrl&PicBin if (movie.PicUrl == null) { if (Lines[i] == @"<div class=""image"">") for (int ii = i + 1; ii < Lines.Count - 1; ii++) { string mask = @"src=""http://*.jpg"""; if (Lines[ii].EqualByStrMask(mask)) { movie.PicUrl = "http://" + Lines[ii].MaskedStrCopy(mask).Trim() + ".jpg"; byte[] imageBytes = new WebClient().DownloadData(movie.PicUrl); movie.PicBin = new MemoryStream(imageBytes); break; } } } #endregion #region Genre { if ((Lines[i].IndexOf(@"<a href=""/genre/") > -1) && (Lines[i].EndsWith(@"</span></a>"))) { if (movie.Genre == null) movie.Genre = ""; string mask = @"""genre"">*</span>"; string genre = Lines[i].MaskedStrCopy(mask).Trim(); if (movie.Genre.IndexOf(genre) == -1) movie.Genre = (movie.Genre + "," + genre).SpecialTrim(","); } } #endregion #region Runtime if (movie.Runtime == TimeSpan.FromMilliseconds(0)) { if ((Lines[i].StartsWith(@"<time itemprop=""duration"" datetime=""") && (Lines[i].EndsWith(@""" >")))) { i++; movie.Runtime = TimeSpan.FromMinutes(double.Parse(Lines[i].Substring(0, Lines[i].IndexOf(" ")))); } } #endregion #region Ratio if (movie.Ratio == 0) { if (Lines[i].IndexOf(@"<span itemprop=""ratingValue"">") > -1) { string mask = @"<strong><span itemprop=""ratingValue"">*</span></strong>"; if (Lines[i].EqualByStrMask(mask)) movie.Ratio = (int)(double.Parse(Lines[i].MaskedStrCopy(mask).Trim(), CultureInfo.InvariantCulture) * 10); } } #endregion #region Director if (movie.Director == null) { if (Lines[i].IndexOf(@"<div class=""txt-block"" itemprop=""director""") > -1) for (int ii = i + 1; (ii < Lines.Count - 1) && (Lines[ii].IndexOf("div>") == -1); ii++) if (Lines[ii].IndexOf(@"<span class=""itemprop"" itemprop=""name"">") > -1) { if (movie.Director == null) movie.Director = ""; string mask = @"<span class=""itemprop"" itemprop=""name"">*</span></a>"; string director = Lines[ii].MaskedStrCopy(mask).Trim(); if (movie.Director.IndexOf(director) == -1) movie.Director = (movie.Director + "," + director).SpecialTrim(","); } } #endregion #region Writer if (movie.Writer == null) { if (Lines[i].IndexOf(@"<div class=""txt-block"" itemprop=""creator""") > -1) for (int ii = i + 1; (ii < Lines.Count - 1) && (Lines[ii].IndexOf("div>") == -1); ii++) if (Lines[ii].IndexOf(@"<span class=""itemprop"" itemprop=""name"">") > -1) { if (movie.Writer == null) movie.Writer = ""; string mask = @"<span class=""itemprop"" itemprop=""name"">*</span></a>"; string writer = Lines[ii].MaskedStrCopy(mask).Trim(); if (movie.Writer.IndexOf(writer) == -1) movie.Writer = (movie.Writer + "," + writer).SpecialTrim(","); } } #endregion #region Actor if (movie.Actor == null) { if (Lines[i].IndexOf(@">Cast<") > -1) for (int ii = i + 1; (ii < Lines.Count - 1) && (Lines[ii].IndexOf("<script>") == -1); ii++) if (Lines[ii].EndsWith(@"</span>")) //if (Lines[ii].IndexOf(@"<span class=""itemprop"" itemprop=""name"">") > -1) { if (movie.Actor == null) movie.Actor = ""; string mask = @""" itemprop='url'> <span class=""itemprop"" itemprop=""name"">*</span>"; string Actor = Lines[ii].MaskedStrCopy(mask).Trim(); if (movie.Actor.IndexOf(Actor) == -1) movie.Actor = (movie.Actor + "," + Actor).SpecialTrim(","); } } #endregion #region Desc if (movie.Desc == null) { if (Lines[i].IndexOf(@"plotsummary?ref_=tt_stry_pl"" >Plot Summary</a>") > -1) { string mask = @"<a href=""*?ref_=tt_stry_pl"" >Plot Summary</a>"; string url = Lines[i].MaskedStrCopy(mask).Trim(); string innerPageContent = new WebClient().DownloadString(@"http://www.imdb.com" + url); innerPageContent.SaveToFile(@"c:\txt.txt"); using (StringReader innerStringReader = new StringReader(innerPageContent)) { List<string> innerLines = new List<string>(); string innerLine; while ((innerLine = innerStringReader.ReadLine()) != null) innerLines.Add(innerLine.Trim()); for (int ii = 0; ii < innerLines.Count - 1; ii++) if (innerLines[ii].IndexOf(@"plotpar") > -1) { ii++; movie.Desc = (movie.Desc + "\n" + innerLines[ii]).SpecialTrim("\n"); } } } } #endregion #region Url if (movie.Url == null) { if (Lines[i].IndexOf(@"plotsummary?ref_=tt_stry_pl"" >Plot Summary</a>") > -1) { string mask = @"<a href=""*plotsummary?ref_=tt_stry_pl"" >Plot Summary</a>"; movie.Url = @"http://www.imdb.com" + Lines[i].MaskedStrCopy(mask).Trim(); } } #endregion } } Movies.Add(movie); }
void ParserIMDB_SearchResultPage(string pageContent) { pageContent.SaveToFile(@"c:\txt.txt"); string searchLine; using (StringReader stringReader = new StringReader(pageContent)) { while ((searchLine = stringReader.ReadLine().Trim()) != null) if (searchLine.StartsWith(@"<tr class=""findResult odd"">")) break; } string[] Lines = searchLine.Split(new string[] { @"<tr class=""findResult" }, StringSplitOptions.RemoveEmptyEntries); foreach (string item in Lines) { Movie movie = new Movie(); movie.Name = item.MaskedStrCopy(@">*</a>", item.LastIndexOf(@"fn_tt_tt")).Trim(); movie.Year = item.Substring(item.IndexOf('('), item.LastIndexOf(')') - item.IndexOf('(') + 1); movie.Url = @"http://www.imdb.com" + item.MaskedStrCopy(@"<a href=""*?ref").Trim(); movie.PicUrl = item.MaskedStrCopy(@"<img src=""*"" />").Trim(); byte[] imageBytes = new WebClient().DownloadData(movie.PicUrl); movie.PicBin = new MemoryStream(imageBytes); SearchResult_IMDB.Add(movie); } }