コード例 #1
0
        private Movie ParseMovie(HtmlNode movieNode)
        {
            string xpName = "div[2]/ul/li[1]";
            string xpProducer = "div[2]/ul/li[5]";
            string xpLength = "div[2]/ul/li[3]";
            string xpType = "div[2]/ul/li[2]";
            string xpImage = ".//img[@class='dn_imgmovies']";
            string xpDetail = "div[2]//a";
            Movie mv = new Movie();
            try
            {
                log.WriteLog("Begin Parse A Movie");
                mv.MvSource = this.GetType().Name;
                mv.Name = movieNode.SelectSingleNode(xpName).InnerText.Trim();

                mv.Producer = movieNode.SelectSingleNode(xpProducer).InnerText;
                mv.Producer = CleanLotteData(mv.Producer);

                mv.Length = movieNode.SelectSingleNode(xpLength).InnerText;
                mv.Length = CleanLotteData(mv.Length);

                mv.Type = movieNode.SelectSingleNode(xpType).InnerText.Trim();
                mv.Type = CleanLotteData(mv.Type);

                mv.Image = @"http://lottecinemavn.com/vi-vn" + movieNode.SelectSingleNode(xpImage).Attributes["src"].Value;
                mv.DetailLink = @"http://lottecinemavn.com/vi-vn" + movieNode.SelectSingleNode(xpDetail).Attributes["href"].Value;

                // Parse detail of a movie
                log.WriteLog("Parse movie detail at " + mv.DetailLink);
                HtmlWeb website = new HtmlWeb();
                HtmlAgilityPack.HtmlDocument doc = website.Load(mv.DetailLink);

                mv.TrailerLink = "http:" + doc.DocumentNode.SelectSingleNode("//iframe[contains(@src,'youtube')]").Attributes["src"].Value;
                mv.Info = doc.DocumentNode.SelectSingleNode("//div[@class='dn_contentmvdt dn_tabsmvdt1']").InnerText.Trim();

                // Parse ticket info
                log.WriteLog("Parse ticket info");
                HtmlAgilityPack.HtmlNodeCollection listNode = doc.DocumentNode.SelectNodes("//div[@class='dn_contentmvdt dn_tabsmvdt2']/div");
                List<Ticket> tickets = new List<Ticket>();
                if (listNode == null)
                {
                }
                else
                {
                    for (int i = 0; i < listNode.Count; i = i + 3)
                    {
                        string strTheater = listNode[i].InnerText.Trim();
                        HtmlAgilityPack.HtmlNodeCollection scheduleNode = listNode[i + 2].SelectNodes(".//table");
                        for (int j = 0; j < scheduleNode.Count; j++)
                        {
                            Regex clean = new Regex("\n|\r");
                            string showDate = scheduleNode[j].SelectNodes(".//div")[0].InnerText.Trim().Replace("-","/");
                            string showTime = clean.Replace(scheduleNode[j].SelectNodes(".//div")[1].InnerText.Trim(), " ");
                            Ticket mvTicket = new Ticket(this.GetType().Name, strTheater, showDate, showTime);
                            tickets.Add(mvTicket);
                        }
                    }
                }
                mv.Tickets = tickets;
            }
            catch (Exception e)
            {
                log.WriteLog("##############################################");
                log.WriteLog("Exception " + e.Source + " : " + e.Message);
            }
            log.WriteLog("Parsed : " + mv.DetailLink);
            return mv;
        }
コード例 #2
0
ファイル: Cgv.cs プロジェクト: khoatndse/MovieSchedulerParser
        private Movie ParseMovie(HtmlNode movieNode)
        {
            string xpName = "div[@class='m_center']/a/span";
            string xpDetail = "div[@class='m_center']/a";
            string xpProducer = "//div[@class='info']/p[1]";
            string xpLength = "div[@class='m_center']/text()[3]";
            string xpType = "div[@class='m_center']/text()[4]";
            string xpImage = "//div[@class='movie-info']/div[@class='photo']/img";
            string dbName = "";
            Movie mv = new Movie();
            try
            {
                log.WriteLog("Begin Parse A Movie");
                mv.MvSource = this.GetType().Name;
                mv.Name = movieNode.SelectSingleNode(xpName).InnerText.Trim();
                mv.DetailLink = @"http://www.cgv.vn" + movieNode.SelectSingleNode(xpDetail).Attributes["href"].Value;
                mv.Length = movieNode.SelectSingleNode(xpLength).InnerText.Split(':')[1].Trim();
                mv.Length = mv.Length.Replace("&#7901;", "ờ").Replace("&#224;", "à").Replace("&#250;", "ú");

                mv.Type = movieNode.SelectSingleNode(xpType).InnerText.Split(':')[1].Trim();
                dbName = movieNode.SelectSingleNode("div[@class='m_right']//div[@class='q_session_time']/a[2]").Attributes["onclick"].Value.ToString().Replace("/vn/", " ").Trim();
                Regex cleanDbName = new Regex(".*\"(.*?)\"");
                Match extractDbName = cleanDbName.Match(dbName);
                dbName = extractDbName.Groups[1].Value;

                // Parse detail of a movie
                log.WriteLog("Parse movie detail at " + mv.DetailLink);
                HtmlWeb website = new HtmlWeb();
                HtmlAgilityPack.HtmlDocument doc = website.Load(mv.DetailLink);

                mv.Producer = doc.DocumentNode.SelectSingleNode(xpProducer).InnerText.Split(':')[1].Trim();
                mv.Image = @"http://www.cgv.vn" + doc.DocumentNode.SelectSingleNode(xpImage).Attributes["src"].Value;
                mv.TrailerLink = doc.DocumentNode.SelectSingleNode("//div[@id='tab1']").InnerHtml.ToString();

                Regex pattern = new Regex("ytlink=(.*?)&");
                Match matcher = pattern.Match(mv.TrailerLink);
                mv.TrailerLink =@"http://www.youtube.com/embed/" + matcher.Groups[1].Value;

                mv.Info = doc.DocumentNode.SelectSingleNode("//div[@class='desc']").InnerText.Trim();

                // Parse ticket info
                log.WriteLog("Parse ticket info");
                //HtmlAgilityPack.HtmlNodeCollection listNode = doc.DocumentNode.SelectNodes("//div[@class='dn_contentmvdt dn_tabsmvdt2']/div");
                List<Ticket> tickets = new List<Ticket>();

                XmlDocument xmlDoc = new XmlDocument();
                foreach (var item in theaterList)
                {
                    string ticketInfo = String.Format(@"https://www.cgv.vn/megastarXMLData.aspx?RequestType=GetSessionTimes&&CinemaID={0}&&MovieName={1}&&Time=TodayAndTomorrow&&visLang=1", item.Key, dbName);
                    xmlDoc.Load(ticketInfo);
                    log.WriteLog("Ticket info of " + dbName + " at " + ticketInfo);

                    // read detail
                    XmlElement rootElement = xmlDoc.DocumentElement;
                    XmlNodeList dateList = rootElement.GetElementsByTagName("date");
                    if (dateList.Count != 0)
                    {
                        for (int i = 0; i < dateList.Count; i++)
                        {
                            if (dateList[i].Attributes.Count != 0)
                            {
                                string showDate = dateList[i].Attributes["name"].Value.ToString();
                                string showTime = "";
                                for (int j = 0; j < dateList[i].ChildNodes.Count; j++)
                                {
                                    XmlNodeList timeList = dateList[i].ChildNodes[j].ChildNodes;
                                    for (int k = 0; k < timeList.Count; k++)
                                    {
                                        if (timeList[k].Name.Equals("value"))
                                        {
                                            string time = timeList[k].InnerText;
                                            Regex clean = new Regex("<a .*>(.*?)</a>");
                                            Match extract = clean.Match(time);
                                            time = extract.Groups[1].Value;
                                            if (time.Contains("AM"))
                                            {
                                                time = time.Replace("AM", " ").Trim();
                                            }
                                            else if (time.Contains("PM"))
                                            {
                                                time = time.Replace("PM", " ").Trim();
                                                int hour = int.Parse(time.Split(':')[0]) + 12;
                                                string min = time.Split(':')[1];
                                                time = hour + ":" + min;
                                            }
                                            showTime += time + " ";
                                            break;
                                        }
                                    }
                                }
                                Ticket mvTicket = new Ticket(this.GetType().Name, item.Value, showDate, showTime);
                                tickets.Add(mvTicket);
                            }
                        }
                    }

                }
                mv.Tickets = tickets;
            }
            catch (Exception e)
            {
                log.WriteLog("##############################################");
                log.WriteLog("Exception " + e.Source + " : " + e.Message);
            }
            log.WriteLog("Parsed : " + mv.DetailLink);
            return mv;
        }
コード例 #3
0
        private Movie ParseMovie(HtmlNode movieNode)
        {
            string xpName = "input[2]";
            string xpFixName = "//div[@class='fr movie-detail']/div[1]";
            string xpType = "//div[@class='fr movie-detail']/div[8]/span[2]";
            string xpLength = "//div[@class='fr movie-detail']/div[11]/span[2]";
            string xpProducer = "//div[@class='fr movie-detail']/div[10]/span[2]";
            string xpImage = "div[@class='img-movie-item']//div[@class='rel']/a[1]/img";
            string xpDetail = "div[@class='img-movie-item']//div[@class='rel']/a[1]";
            string xpInfo = "//div[@class='fr movie-detail']/div[18]";

            Movie mv = new Movie();
            try
            {
                log.WriteLog("Begin Parse A Movie");
                mv.MvSource = this.GetType().Name;
                mv.Name = movieNode.SelectSingleNode(xpName).Attributes["value"].Value.Trim();
                mv.TrailerLink = movieNode.SelectSingleNode("input[3]").Attributes["value"].Value.Trim().Replace("/v/","/embed/");
                mv.Image = @"https://www.galaxycine.vn" + movieNode.SelectSingleNode(xpImage).Attributes["src"].Value;
                mv.DetailLink = @"https://www.galaxycine.vn" + movieNode.SelectSingleNode(xpDetail).Attributes["href"].Value;

                // Parse detail of a movie
                log.WriteLog("Parse movie detail at " + mv.DetailLink);

                HtmlWeb website = new HtmlWeb();
                HtmlAgilityPack.HtmlDocument doc = website.Load(mv.DetailLink);
                mv.Name = doc.DocumentNode.SelectSingleNode(xpFixName).InnerText.Trim();
                mv.Type = doc.DocumentNode.SelectSingleNode(xpType).InnerText.Trim();
                mv.Producer = doc.DocumentNode.SelectSingleNode(xpProducer).InnerText.Trim();
                mv.Length = doc.DocumentNode.SelectSingleNode(xpLength).InnerText.Trim();
                mv.Info = doc.DocumentNode.SelectSingleNode(xpInfo).InnerText.Trim();

                // Parse ticket info
                log.WriteLog("Parse ticket info");
                HtmlAgilityPack.HtmlNodeCollection listNode = doc.DocumentNode.SelectNodes("//div[@class='mov_sc']");
                List<Ticket> tickets = new List<Ticket>();

                for (int i = 0; i < listNode.Count; i++)
                {
                    string strTheater = listNode[i].SelectSingleNode("span[1]").InnerText.Replace("GLX"," ").Trim();
                    HtmlAgilityPack.HtmlNodeCollection scheduleNode = listNode[i].SelectNodes("./div[2]/div");
                    for (int j = 0; j < scheduleNode.Count; j++)
                    {
                        string showDate = scheduleNode[j].SelectSingleNode("span").InnerText.Trim().Split(',')[1].Replace(".", "/").Trim() ;
                        string showTime = "";
                        for (int k = 0; k < scheduleNode[j].SelectSingleNode("div[2]").ChildNodes.Count; k++)
                        {
                            showTime += scheduleNode[j].SelectSingleNode("div[2]").ChildNodes[k].InnerText.Trim() + " ";
                        }

                        Ticket mvTicket = new Ticket(this.GetType().Name, strTheater, showDate, showTime);
                        tickets.Add(mvTicket);
                    }
                }
                mv.Tickets = tickets;
            }
            catch (Exception e)
            {
                log.WriteLog("##############################################");
                log.WriteLog("Exception " + e.Source + " : " + e.Message);
            }
            log.WriteLog("Parsed : " + mv.DetailLink);
            return mv;
        }