示例#1
0
        private void UpdateNovel(Catalog log, HtmlDocument doc)
        {
            col = null;
            using (CrawelNovelDbContext context = new CrawelNovelDbContext())
            {
                string MaxUrl = string.Empty;
                if (rbBQG.Checked)
                {
                    MaxUrl = context.Chapter.Where(c => c.NoteBookId.Equals(log.Id)).Max(c => c.ChapterUrl);
                }
                else
                {
                    MaxUrl = context.Chapter.Where(c => c.NoteBookId.Equals(log.Id)).Min(c => c.ChapterUrl);
                }

                var chap = context.Chapter.FirstOrDefault(f => f.ChapterUrl.Equals(MaxUrl));
                col = doc.DocumentNode.SelectNodes("//dd");
                if (rbJPXS.Checked)
                {
                    col.RemoveAt(0);
                    col.RemoveAt(col.Count - 1);
                }
                ProgressCount = col.Count;
                CataUrl       = log.Url;
                CataId        = log.Id;
                this.backgroundWorker1.RunWorkerAsync(chap);  //运行backgroundWorker组件
            }



            ProgressForm form = new ProgressForm(this.backgroundWorker1);  //显示进度条窗体

            form.ShowDialog(this);
        }
示例#2
0
        public bool StartParsing()
        {
            newsItems.Clear();
            WebPage page;

            try
            {
                page = web.NavigateToPage(new Uri(sourceUrl));
            }
            catch
            {
                return(false);
            }
            HtmlNode           node       = page.Html.SelectSingleNode("//div[@class='lenta_news__day']");
            int                count      = 0;
            HtmlNodeCollection childNodes = node.ChildNodes;

            childNodes.RemoveAt(0);
            childNodes.RemoveAt(0);
            foreach (var child in childNodes)
            {
                if (count > 8)
                {
                    break;
                }
                if (child.Name == "#text")
                {
                    continue;
                }
                newsItems.Add(ParseWebPage("https://" + new Uri(sourceUrl).Host + child.ChildNodes[1].GetAttributeValue("href", "")));
                count++;
            }
            return(true);
        }
示例#3
0
        private HtmlNodeCollection RemoveSurplusChap(HtmlNodeCollection chapList, int startNode, int endNode)
        {
            for (int i = 0; i < startNode; i++)
            {
                chapList.RemoveAt(0);
            }
            for (int i = 0; i < endNode; i++)
            {
                chapList.RemoveAt(chapList.Count - 1);
            }

            return(chapList);
        }
示例#4
0
        // 搜索数据
        private void SoBookData()
        {
            var     title = this.txtBookName.Text;
            string  url   = "https://so.biqusoso.com/s.php?ie=utf-8&siteid=biqukan.com&q=" + title;
            HtmlWeb web   = new HtmlWeb();

            //从url中加载
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);
            HtmlNode           headNode      = doc.DocumentNode.SelectSingleNode("//ul");
            HtmlNodeCollection aCollection   = headNode.SelectNodes("li");

            if (aCollection.Count <= 0)
            {
                return;
            }
            aCollection.RemoveAt(0);
            foreach (var item in aCollection)
            {
                ListViewItem tt = new ListViewItem();
                tt.SubItems[0].Text = item.SelectNodes("span")[0].InnerText;
                tt.SubItems.Add(item.SelectNodes("span")[1].InnerText);
                tt.SubItems.Add(item.SelectNodes("span")[2].InnerText);
                tt.SubItems.Add(item.SelectNodes("span")[1].SelectNodes("a")[0].Attributes["href"].Value);
                listView.Add(tt);
            }
        }
示例#5
0
        public void getAnimeList()
        {
            /* gets a list of potential anime to recommend */

            List <String> ids = getInfo.TblGetIDString(collector.sort_remove_select_gen());

            /* Reset the table in case information is present from a previous attempt */

            getInfo.resetRecommendationTable();

            HtmlNodeCollection nodes = getSearchNode(ids);

            /* specifies a limit on how many recommendations you want to give */

            int limit = 15;

            if (nodes == null)
            {
                extract_seperateNodes(ids, limit / 3);
            }
            else
            {
                nodes.RemoveAt(0);

                extractNodes(nodes, limit);

                if (nodes.Count < limit)
                {
                    limit -= nodes.Count;
                    extract_seperateNodes(ids, (int)Math.Round((double)limit / 3));
                }
            }

            swapScreen?.Invoke(this, new SecondWindow());
        }
示例#6
0
        public static void FetchScienceIl()
        {
            var results = new List <Element>();

            if (File.Exists("elements.json"))
            {
                _cachedElements = new List <Element>(Parse());
                return;
            }

            using (var wc = new WebClient())
            {
                const string elementsUrl  = "http://www.science.co.il/PTelements.asp?s=Weight";
                const string elementsPath = "//table[@class='tabint8']/tr";

                string src = wc.DownloadString(elementsUrl);
                var    doc = new HtmlDocument();

                doc.LoadHtml(src);

                HtmlNode           root       = doc.DocumentNode;
                HtmlNodeCollection xPathQuery = root.SelectNodes(elementsPath);

                xPathQuery.RemoveAt(0);
                xPathQuery.RemoveAt(xPathQuery.Count - 1);

                foreach (var node in xPathQuery)
                {
                    Element    element    = new Element();
                    HtmlNode[] properties = node.Elements("td").ToArray();

                    element.AtomicWeight          = float.Parse(properties[2].InnerText);
                    element.Name                  = properties[3].InnerText;
                    element.Symbol                = properties[4].InnerText;
                    element.MeltingPoint          = float.Parse(string.IsNullOrEmpty(properties[5].InnerText) ? "0" : properties[5].InnerText);
                    element.BoilingPoint          = float.Parse(string.IsNullOrEmpty(properties[6].InnerText) ? "0" : properties[6].InnerText);
                    element.Density               = float.Parse(string.IsNullOrEmpty(properties[7].InnerText) ? "0" : properties[7].InnerText);
                    element.AbundanceInEarth      = float.Parse(string.IsNullOrEmpty(properties[8].InnerText) ? "0" : properties[8].InnerText);
                    element.Group                 = int.Parse(properties[10].InnerText);
                    element.FirstIonizationEnergy = float.Parse(string.IsNullOrEmpty(properties[12].InnerText) ? "0" : properties[12].InnerText);

                    results.Add(element);
                }
            }

            _cachedElements = new List <Element>(results);
        }
        private static StationDTO ParseStationHTML(string html)
        {
            StationDTO station = new StationDTO();

            station.arrives = new List <StationEntryDTO>();
            station.departs = new List <StationEntryDTO>();

            HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();

            htmlDocument.LoadHtml(html);
            HtmlNode infoTable = htmlDocument.DocumentNode.SelectSingleNode("html/body/table/tr[2]/td/table");

            HtmlNodeCollection rows = infoTable.SelectNodes("tr");

            rows.RemoveAt(0);
            rows.RemoveAt(0);

            bool departing = true;

            foreach (var row in rows)
            {
                if (row.Attributes["bgcolor"].Value.ToLower() != "#f5f8fa")
                {
                    departing = false;
                    continue;
                }

                StationEntryDTO    entry   = new StationEntryDTO();
                HtmlNodeCollection columns = row.SelectNodes("td");
                entry.station = columns[0].InnerText.Trim();
                entry.train   = columns[1].InnerText.Trim();;
                entry.time    = columns[2].InnerText.Trim();;

                if (departing)
                {
                    station.departs.Add(entry);
                }
                else
                {
                    station.arrives.Add(entry);
                }
            }

            return(station);
        }
示例#8
0
        void parser()
        {
            string html = request();

            while (html == "")
            {
                //play error sound
                System.Media.SystemSounds.Beep.Play();
                Thread.Sleep(1000 * 60);
                html = request();
            }

            //parse
            var document = new HtmlAgilityPack.HtmlDocument();

            document.LoadHtml(html);
            HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//tr");

            collection.RemoveAt(0);

            foreach (HtmlNode rows in collection)
            {
                var cells = rows.SelectNodes("th|td");

                for (int i = cells.Count - 1; i >= 0; i--)
                {
                    string inner   = cells[i].InnerHtml;
                    var    nospace = inner.Trim();
                    if (nospace.Length == 0)
                    {
                        cells.RemoveAt(i);
                    }
                }

                for (int i = 0; i < cells.Count; i += 3)
                {
                    string groupNumber = cells[i].InnerHtml;
                    string block       = cells[i + 1].InnerText;
                    string people      = cells[i + 2].InnerHtml;

                    if ((block.Contains("Mo. 1. Block") || block.Contains("Mo. 5. Block")) &&
                        block.Contains("kyb"))
                    {
                        var anVer = people.Split('/');
                        var an    = int.Parse(anVer[0]);
                        var ver   = int.Parse(anVer[1]);

                        if (an < ver)
                        {
                            notifyIcon1.BalloonTipText  = block + " " + an + "/" + ver + " Plätze";
                            notifyIcon1.BalloonTipTitle = "Gruppe frei!";
                            notifyIcon1.ShowBalloonTip(19000);
                        }
                    }
                }
            }
        }
示例#9
0
        public void ProcessCourse()
        {
            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(course_result);
            HtmlNode           table      = htmlDoc.DocumentNode.SelectSingleNode("//table[@class='table listTable']");
            HtmlNodeCollection tableNodes = table.ChildNodes;
            HtmlNodeCollection trNodes    = new HtmlNodeCollection(table);

            foreach (var n in tableNodes)
            {
                if (n.Name == "tr")
                {
                    trNodes.Add(n);
                }
            }
            trNodes.RemoveAt(0);    //第一个tr节点非课程项,故去除

            foreach (var n in trNodes)
            {
                HtmlNodeCollection tdNodes = n.ChildNodes;
                var itemlist = new List <String>();
                foreach (var td in tdNodes)     //每个tdNodes中含27个td
                {
                    var a = td.InnerText.Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", "");
                    itemlist.Add(a);
                }

                using (var context = new jwContext())
                {
                    var thisLessonNum = itemlist[1]; //直接将itemlist[1]放入Linq表达式将报错
                    var course        = context.Courses.SingleOrDefault(c => c.StuID == stuid && c.LessonNum == thisLessonNum);
                    if (course == null)              //确保表中不存在此项记录
                    {
                        var newcourse = new Course
                        {
                            StuID           = stuid,
                            LessonNum       = itemlist[1],
                            LessonName      = itemlist[3],
                            LessonType      = itemlist[5],
                            LearninType     = itemlist[7],
                            TeachingCollege = itemlist[9],
                            Teacher         = itemlist[11],
                            Specialty       = itemlist[13],
                            Credit          = itemlist[15],
                            LessonHours     = itemlist[17],
                            Time            = itemlist[19],
                            Note            = itemlist[21]
                        };
                        context.Courses.Add(newcourse);
                        context.SaveChanges();
                    }
                }
            }
        }
        private static PriceDTO ParsePriceResponse(string priceResponse)
        {
            PriceDTO     price        = new PriceDTO();
            HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();

            htmlDocument.LoadHtml(priceResponse);

            HtmlNodeCollection tables = htmlDocument.DocumentNode.SelectNodes("html/body/table");

            HtmlNode induvidialsTable = tables[2];

            HtmlNodeCollection dataRows = induvidialsTable.SelectNodes("tbody/tr");

            dataRows.RemoveAt(0);

            price.regularFeeSecondClass = (dataRows[0].SelectNodes("td")[1].InnerText.Trim());
            price.regularFeeFirstClass  = (dataRows[0].SelectNodes("td")[2].InnerText.Trim());

            price.decreasedFeeSecondClass = (dataRows[1].SelectNodes("td")[1].InnerText.Trim());
            price.decreasedFeeFirstClass  = (dataRows[1].SelectNodes("td")[2].InnerText.Trim());

            price.decreasedFeeWithFirstIncludedSecondClass = (dataRows[2].SelectNodes("td")[1].InnerText.Trim());
            price.decreasedFeeWithFirstIncludedFirstClass  = (dataRows[2].SelectNodes("td")[2].InnerText.Trim());

            if (dataRows.Count == 4)
            {
                price.bothWaysSecondClass = (dataRows[3].SelectNodes("td")[1].InnerText.Trim());
                price.bothWaysFirstClass  = (dataRows[3].SelectNodes("td")[2].InnerText.Trim());
            }
            else
            {
                price.relationalSecondClass = (dataRows[3].SelectNodes("td")[1].InnerText.Trim());
                price.relationalFirstClass  = (dataRows[3].SelectNodes("td")[2].InnerText.Trim());

                price.bothWaysSecondClass = (dataRows[4].SelectNodes("td")[1].InnerText.Trim());
                price.bothWaysFirstClass  = (dataRows[4].SelectNodes("td")[2].InnerText.Trim());
            }


            HtmlNode           groupsTable    = tables[3];
            HtmlNodeCollection dataRowsGroups = groupsTable.SelectNodes("tbody/tr");

            dataRowsGroups.RemoveAt(0);

            price.groupRegularSecondClass = (dataRowsGroups[0].SelectNodes("td")[1].InnerText.Trim());
            price.groupRegularFirstClass  = (dataRowsGroups[0].SelectNodes("td")[2].InnerText.Trim());

            price.groupDecreasedSecondClass = (dataRowsGroups[1].SelectNodes("td")[1].InnerText.Trim());
            price.groupDecreasedFirstClass  = (dataRowsGroups[1].SelectNodes("td")[2].InnerText.Trim());

            return(price);
        }
        private static TrainDTO ParseTrainHTML(string html)
        {
            HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();

            htmlDocument.LoadHtml(html);
            HtmlNode infoTable = htmlDocument.DocumentNode.SelectSingleNode("html/body/table/tr[not (@align)]/td/table");

            HtmlNodeCollection infoRows = infoTable.SelectNodes("tr");

            infoRows.RemoveAt(0);
            infoRows.RemoveAt(0);
            infoRows.RemoveAt(infoRows.Count - 2);

            HtmlNode TrainOptions = infoRows[infoRows.Count - 1];

            infoRows.RemoveAt(infoRows.Count - 1);
            TrainDTO train = new TrainDTO();

            train.stops   = new List <TrainSimpleStopDTO>();
            train.options = new List <string>();

            foreach (var stopRow in infoRows)
            {
                HtmlNodeCollection columns = stopRow.SelectNodes("td");
                train.stops.Add(new TrainSimpleStopDTO()
                {
                    station = columns[0].InnerText.Trim(), arrives = columns[1].InnerText.Trim(), departs = columns[2].InnerText.Trim()
                });
            }


            foreach (var image in TrainOptions.SelectSingleNode("td").SelectSingleNode("span").SelectNodes("img"))
            {
                train.options.Add(image.Attributes["title"].Value);
            }


            return(train);
        }
示例#12
0
        /// <summary>
        /// Grabs the required trs from the market table after calculating the range from the base number.
        /// </summary>
        /// <param name="marketURL">The market URL</param>
        /// <param name="openMarketBaseNumber">The open market base number</param>
        /// <returns>HtmlNodeCollection</returns>
        private HtmlNodeCollection DownloadMarketData(string marketURL, int openMarketBaseNumber)
        {
            // Define the range
            baseNumber         = Math.Round(Convert.ToDecimal(openMarketBaseNumber), 2);
            baseNumberPlus50   = baseNumber + 100;
            baseNumberPlus100  = baseNumber + 200;
            baseNumberPlus150  = baseNumber + 300;
            baseNumberPlus200  = baseNumber - 100;
            baseNumberMinus50  = baseNumber - 200;
            baseNumberMinus100 = baseNumber - 300;

            // Grab all rows
            var htmlWeb = new HtmlWeb();

            HtmlAgilityPack.HtmlDocument htmlDocument = htmlWeb.Load(marketURL);

            HtmlNodeCollection tableRows = htmlDocument.DocumentNode.SelectNodes("//table[@id=\"octable\"]//tr");

            tableRows.RemoveAt(tableRows.Count - 1);
            tableRows.RemoveAt(0);
            tableRows.RemoveAt(0);

            // Get only those rows which contain values for the defined tange
            HtmlNodeCollection workSetRows = new HtmlNodeCollection(null);

            foreach (var currentTableRow in tableRows)
            {
                if (currentTableRow.InnerHtml.Contains(baseNumber.ToString()) || currentTableRow.InnerHtml.Contains(baseNumberPlus50.ToString()) ||
                    currentTableRow.InnerHtml.Contains(baseNumberPlus100.ToString()) || currentTableRow.InnerHtml.Contains(baseNumberMinus50.ToString()) ||
                    currentTableRow.InnerHtml.Contains(baseNumberMinus100.ToString()) || currentTableRow.InnerHtml.Contains(baseNumberPlus150.ToString()) ||
                    currentTableRow.InnerHtml.Contains(baseNumberPlus200.ToString()))
                {
                    workSetRows.Add(currentTableRow);
                }
            }

            return(workSetRows);
        }
示例#13
0
        private void CrawJpxsWebSite()
        {
            string       htmlContent = GetContent(txtWebSite.Text);
            HtmlDocument doc         = new HtmlDocument();

            doc.LoadHtml(htmlContent);
            Catalog  log    = new Catalog();
            HtmlNode navFM  = doc.GetElementbyId("fmimg");
            string   ImgUrl = navFM.ChildNodes[0].Attributes["src"].Value;

            log.NoteName = navFM.ChildNodes[0].Attributes["alt"].Value.UrlDecode();

            if (CheckIfExitNovelName(log.NoteName))
            {
                MessageBox.Show("数据库已存在该小说");
                return;
            }
            log.Img        = GetImage(JpxsAddress + ImgUrl);
            log.CreateTime = DateTime.Now;
            log.Url        = txtWebSite.Text;
            col            = doc.DocumentNode.SelectNodes("//dd");
            col.RemoveAt(0);
            col.RemoveAt(col.Count - 1);
            CataId        = SaveCatalog(log);
            txtNovel.Text = log.NoteName;

            //Parser parser = new Parser(urls);

            ProgressCount = col.Count;

            this.backgroundWorker1.DoWork             += backgroundWorker1_DoWork;
            this.backgroundWorker1.RunWorkerCompleted += backgroundWorker1_RunWorkerCompleted;
            this.backgroundWorker1.RunWorkerAsync();                      //运行backgroundWorker组件
            ProgressForm form = new ProgressForm(this.backgroundWorker1); //显示进度条窗体

            form.ShowDialog(this);
        }
示例#14
0
        private void extract_seperateNodes(List <String> ids, int limit)
        {
            List <String>[] setOfIds = seperateSearch(ids);

            foreach (List <String> indivIds in setOfIds)
            {
                HtmlNodeCollection collectNodes = getSearchNode(indivIds);
                if (collectNodes == null)
                {
                    continue;
                }

                collectNodes.RemoveAt(0);

                extractNodes(collectNodes, limit);
            }
        }
示例#15
0
        public static HtmlNodeCollection SearchISIN(string companyName, bool onlyEquity, bool onlyNonListing)
        {
            string securityScope = onlyEquity ? "01" : "99";

            onlyNonListing = onlyEquity && onlyNonListing; // no choice for all security scope
            string listScope = onlyNonListing ? "lst_yn2=N" : "lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D";

            companyName = HttpUtility.UrlEncode(companyName, Encoding.GetEncoding("euc-kr"));

            string postData = string.Format("kind=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&secuGubun={0}"
                                            + "&{1}&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on"
                                            + "&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1"
                                            + "&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word={2}"
                                            + "&ef_iss_inst_cd=&ef_isu_nm=&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=",
                                            securityScope,
                                            listScope,
                                            companyName);

            AdvancedWebClient wc         = new AdvancedWebClient();
            string            pageSource = WebClientUtil.GetPageSource(wc, queryURL, 300000, postData, Encoding.GetEncoding("euc-kr"));

            if (string.IsNullOrEmpty(pageSource))
            {
                return(null);
            }

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(pageSource);

            if (doc.DocumentNode.SelectNodes("//table").Count < 2)
            {
                return(null);
            }

            HtmlNodeCollection records = doc.DocumentNode.SelectNodes("//table")[1].SelectNodes(".//tr");

            if (records.Count == 1)
            {
                return(null);
            }

            records.RemoveAt(0);

            return(records);
        }
        public async Task <ActionResult> Synchronize()
        {
            var url = "https://211colorado.communityos.org/z_eda/publicshelterassist.taf?function=list";

            var getHtmlWeb = new HtmlWeb();
            var document   = await getHtmlWeb.LoadFromWebAsync(url);

            HtmlNodeCollection nodes = document.DocumentNode.SelectNodes("//table[3]/tr/td/table/tr");


            if (nodes != null && nodes.Count > 1)
            {
                var shelters = new List <Shelter>();

                var header = nodes[0];
                nodes.RemoveAt(0);

                foreach (var node in nodes)
                {
                    var shelter = new Shelter();

                    List <string> columnValues = node.SelectNodes("td").Select(td => td.InnerText.Replace("&nbsp;", " ").Trim()).ToList();

                    shelter.Name = columnValues[0];
                    shelter.CurrentPopulation     = intParseDefault(columnValues[4], 0);
                    shelter.SingleSpacesAvailable = intParseDefault(columnValues[5], 0);
                    shelter.FamilySpacesAvailable = intParseDefault(columnValues[6], 0);
                    shelter.CurrentTotalSpaces    = intParseDefault(columnValues[7], 0);
                    shelter.ShelterID             = columnValues[12];

                    shelter.AvailabilityLastUpdated = DateTime.UtcNow;

                    shelters.Add(shelter);
                }

                _dbContext.Shelters.RemoveRange(_dbContext.Shelters);
                _dbContext.SaveChanges();
                _dbContext.Shelters.AddRange(shelters);

                _dbContext.SaveChanges();
            }

            return(RedirectToAction("Index"));
        }
示例#17
0
        public static HtmlNodeCollection SearchISIN(string companyName, bool onlyEquity)
        {
            //string securityScope = onlyEquity ? "01" : "99";
            //onlyNonListing = onlyEquity ? onlyNonListing : false; // no choice for all security scope
            //string listScope = onlyNonListing ? "lst_yn2=N" : "lst_yn_all=on&lst_yn1=Y&lst_yn2=N&lst_yn3=D";
            //companyName = HttpUtility.UrlEncode(companyName, Encoding.GetEncoding("euc-kr"));

            //string postData = string.Format("kind=&ef_std_cd_grnt_dt_from=&ef_std_cd_grnt_dt_to=&secuGubun={0}"
            //    + "&{1}&els_dls_all=on&els_dls1=els&els_dls2=dls&so_gb_all=on&so_gb1=s&so_gb2=o&jp_gb_all=on"
            //    + "&jp_gb1=c&jp_gb2=t&jp_gb3=r&jp_gb4=i&hg_gb_all=on&hg_gb1=h&hg_gb2=g&tg_gb_all=on&tg_gb1=x&tg_gb2=z&df_gb_all=on&df_gb1=df1"
            //    + "&df_gb2=df2&df_gb3=df3&df_gb4=df4&df_gb5=df5&df_gb6=df6&df_gb7=df7&cb_search_column=co_nm&ef_key_word={2}"
            //    + "&ef_iss_inst_cd=&ef_isu_nm=&ef_iss_dt_from=&ef_iss_dt_to=&ef_lst_dt_from=&ef_lst_dt_to=",
            //    securityScope,
            //    listScope,
            //    companyName);
            string         uri     = @"http://isin.krx.co.kr/srch/srch.do?method=srchList";
            HttpWebRequest request = WebRequest.Create(uri) as HttpWebRequest;

            request.Timeout                    = 300000;
            request.Method                     = "GET";
            request.CookieContainer            = cookies;
            request.Host                       = @"isin.krx.co.kr";
            request.Accept                     = @"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
            request.Headers["Accept-Encoding"] = @"gzip,deflate,sdch";
            request.Headers["Accept-Language"] = @"en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4";
            request.KeepAlive                  = true;
            request.UserAgent                  = @"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36";
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            StreamReader    sr       = new StreamReader(response.GetResponseStream());
            string          st       = sr.ReadToEnd();

            string postDataPath;

            if (onlyEquity)
            {
                if (!File.Exists(@"Config\Korea\EquityISINPostData.txt"))
                {
                    System.Windows.Forms.MessageBox.Show(string.Format("The file {0} missing.", @"Config\Korea\EquityISINPostData.txt"));
                    return(null);
                }
                postDataPath = @"Config\Korea\EquityISINPostData.txt";
            }
            else
            {
                if (!File.Exists(@"Config\Korea\KDRISINPostData.txt"))
                {
                    System.Windows.Forms.MessageBox.Show(string.Format("The file {0} missing.", @"Config\Korea\KDRISINPostData.txt"));
                    return(null);
                }
                postDataPath = @"Config\Korea\KDRISINPostData.txt";
            }


            string postData = string.Format(File.ReadAllText(postDataPath, Encoding.UTF8), companyName, companyName);

            //AdvancedWebClient wc = new AdvancedWebClient();
            //string pageSource = WebClientUtil.GetPageSource(wc, uri, 300000, postData, Encoding.GetEncoding("euc-kr"));
            string pageSource = GetPageSource(uri, postData);

            if (string.IsNullOrEmpty(pageSource))
            {
                return(null);
            }

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(pageSource);

            if (doc == null)
            {
                return(null);
            }

            if (doc.DocumentNode.SelectNodes("//table").Count < 2)
            {
                return(null);
            }

            HtmlNodeCollection records = doc.DocumentNode.SelectNodes("//table")[2].SelectNodes(".//tr");

            if (records.Count == 1)
            {
                return(null);
            }

            records.RemoveAt(0);

            return(records);
        }
示例#18
0
        /// <summary>
        /// This will move all the nodes from the specified index to the new parent.
        /// </summary>
        private static void MoveNodesDown(ref HtmlNodeCollection nodes, int index, HtmlElement newParent)
        {
            int count = nodes.Count;

            for (int i = index; i < count; i++)
            {
                ((HtmlElement)newParent).Nodes.Add(nodes[i]);
                nodes[i].SetParent(newParent);
            }

            for (int i = index; i < count; i++)
            {
                nodes.RemoveAt(index);
            }
            newParent.IsExplicitlyTerminated = true;
        }
        public void ProcessScore()
        {
            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(score_result);
            HtmlNode           table      = htmlDoc.DocumentNode.SelectSingleNode("//table[@class='table listTable']");
            HtmlNodeCollection tableNodes = table.ChildNodes;
            HtmlNodeCollection trNodes    = new HtmlNodeCollection(table);

            foreach (var n in tableNodes)
            {
                if (n.Name == "tr")
                {
                    trNodes.Add(n);
                }
            }
            trNodes.RemoveAt(0);    //第一个tr节点非课程项,故去除

            foreach (var n in trNodes)
            {
                HtmlNodeCollection tdNodes = new HtmlNodeCollection(n);
                foreach (var d in n.ChildNodes)
                {
                    if (d.Name == "td")
                    {
                        tdNodes.Add(d);
                    }
                }

                var itemlist = new List <String>();
                foreach (var td in tdNodes)
                {
                    var a = td.InnerText.Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", "");
                    itemlist.Add(a);
                }

                using (var context = new jwContext())
                {
                    var thisLessonName = itemlist[0]; //直接将itemlist[1]放入Linq表达式将报错
                    var score          = context.Scores.SingleOrDefault(s => s.StuID == stuid && s.LessonName == thisLessonName);
                    if (score == null)                //确保表中不存在此项记录
                    {
                        if (itemlist[10] != "")       //只存储已出成绩的课
                        {
                            var newscore = new Score
                            {
                                StuID             = stuid,
                                LessonName        = itemlist[0],
                                LessonType        = itemlist[1],
                                GeneralLessonType = itemlist[2],
                                LessonAttribute   = itemlist[3],
                                Credit            = itemlist[4],
                                TeacherName       = itemlist[5],
                                TeachingCollege   = itemlist[6],
                                LearningType      = itemlist[7],
                                Year = itemlist[8],
                                Term = itemlist[9],
                                Mark = itemlist[10],
                            };
                            context.Scores.Add(newscore);
                            context.SaveChanges();
                        }
                    }
                }
            }
        }
示例#20
0
        public static List <string[]> GetBarnesAndNobleData(string bookTitle, char bookType, byte currPageNum)
        {
            // Initialize the html doc for crawling
            HtmlDocument doc = new HtmlDocument();

            EdgeOptions edgeOptions = new EdgeOptions();

            edgeOptions.UseChromium      = true;
            edgeOptions.PageLoadStrategy = PageLoadStrategy.Eager;
            edgeOptions.AddArgument("headless");
            edgeOptions.AddArgument("disable-gpu");
            edgeOptions.AddArgument("disable-extensions");
            edgeOptions.AddArgument("inprivate");
            EdgeDriver edgeDriver = new EdgeDriver(edgeOptions);

            edgeDriver.Navigate().GoToUrl(GetUrl(bookTitle, currPageNum, bookType));
            Thread.Sleep(2000);
            doc.LoadHtml(edgeDriver.PageSource);

            HtmlNodeCollection titleData = doc.DocumentNode.SelectNodes("//a[@class=' ']");

            Console.WriteLine();
            HtmlNodeCollection priceData       = doc.DocumentNode.SelectNodes("//a[@class=' link']//span[last()]");
            HtmlNodeCollection stockStatusData = doc.DocumentNode.SelectNodes("//div[1][@class='availability-spacing flex']//p");
            HtmlNode           pageCheck       = doc.DocumentNode.SelectSingleNode("//li[@class='pagination__next ']");

            if (bookType == 'N')
            {
                HtmlNodeCollection formatTypeData = doc.DocumentNode.SelectNodes("//span[@class='format']");
                for (int x = 0; x < formatTypeData.Count; x++)
                {
                    if (formatTypeData[x].InnerText.IndexOf("NOOK") != -1)
                    {
                        titleData.RemoveAt(x);
                        formatTypeData.RemoveAt(x);
                        x--;
                    }
                }
                formatTypeData = null; //Free the format type list from memory
            }
            try{
                string stockStatus, currTitle;
                Regex  removeExtra = new Regex(@"[^a-z']");
                for (int x = 0; x < titleData.Count; x++)
                {
                    currTitle = titleData[x].InnerText;
                    if (removeExtra.Replace(currTitle.ToLower(), "").IndexOf(removeExtra.Replace(bookTitle.ToLower(), "")) == 0)
                    {
                        stockStatus = stockStatusData[x].InnerText;
                        if (stockStatus.IndexOf("Available Online") != -1)
                        {
                            stockStatus = "IS";
                        }
                        else if (stockStatus.IndexOf("Out of Stock Online") != -1)
                        {
                            stockStatus = "OOS";
                        }
                        else if (stockStatus.IndexOf("Pre-order Now") != -1)
                        {
                            stockStatus = "PO";
                        }

                        dataList.Add(new string[] { currTitle, priceData[x].InnerText.Trim(), stockStatus, "Barnes & Noble" });
                    }
                }

                if (pageCheck != null)
                {
                    currPageNum++;
                    GetBarnesAndNobleData(bookTitle, bookType, currPageNum);
                }
                else
                {
                    edgeDriver.Quit();

                    foreach (string link in links)
                    {
                        Console.WriteLine(link);
                    }
                }
            }
            catch (NullReferenceException ex) {
                Console.Error.WriteLine(ex);
                Environment.Exit(1);
            }

            using (StreamWriter outputFile = new StreamWriter(@"C:\MangaWebScrape\MangaWebScrape\Data_Files\BarnesAndNobleData.txt"))
            {
                foreach (string[] data in dataList)
                {
                    outputFile.WriteLine(data[0] + " " + data[1] + " " + data[2] + " " + data[3]);
                }
            }

            return(dataList);
        }
示例#21
0
        public void Run(DateTime day)
        {
            bool is25hours = (day.Month == 10 && isLastSunday(day));
            bool is23hours = !is25hours && (day.Month == 3 && isLastSunday(day));

            string URL = _baseURL + day.ToString("yyyy-MM-dd") + "/FR";

            try
            {
                _htmlDoc.LoadHtml(_webClient.DownloadString(URL));

                //ottengo l'array delle date visualizzate
                HtmlNode        dateRow = _htmlDoc.DocumentNode.SelectSingleNode("//div[@id='tab_fr']//table[@class='list hours responsive']//tr");
                List <DateTime> days    = new List <DateTime>();
                foreach (HtmlNode col in dateRow.SelectNodes("th"))
                {
                    DateTime d = new DateTime();
                    if (DateTime.TryParseExact(col.InnerText + " " + day.Year, "ddd, dd/MM yyyy", new CultureInfo("en-US"), DateTimeStyles.None, out d))
                    {
                        days.Add(d);
                    }
                }

                KeyValuePair <string, int>[] tabIDs = new KeyValuePair <string, int>[]
                {
                    new KeyValuePair <string, int>("tab_fr", 987),
                    new KeyValuePair <string, int>("tab_de", 924),
                    new KeyValuePair <string, int>("tab_ch", 988)
                };

                foreach (KeyValuePair <string, int> tabID in tabIDs)
                {
                    HtmlNodeCollection tab = _htmlDoc.DocumentNode.SelectNodes("//div[@id='" + tabID.Key + "']//table[@class='list hours responsive']//tr[@class='no-border']");

                    //la mia data ha 24 ore ma la tabella contiene anche la riga della 25-esima
                    if (!is25hours && tab.Count() == 25)
                    {
                        tab.RemoveAt(3);
                    }

                    DataTable dt = initTable();

                    int i     = 0;
                    int index = days.IndexOf(day);
                    foreach (HtmlNode row in tab)
                    {
                        //seleziono il valore che mi interessa dalla tabella sapendo che index è 0-based e che le prime 2 colonne sono di intestazione
                        HtmlNode mgpVal = row.SelectSingleNode("td[" + (3 + index) + "]");
                        DataRow  newRow = dt.NewRow();

                        newRow["Zona"] = tabID.Value;
                        newRow["Data"] = day.ToString("yyyyMMdd") + (++i < 10 ? "0" : "") + i;
                        newRow["Mgp"]  = 0;
                        decimal tmp;
                        if (Decimal.TryParse(mgpVal.InnerText.Replace('.', ','), out tmp))
                        {
                            newRow["MGP"] = tmp;
                        }

                        dt.Rows.Add(newRow);
                    }

                    if (dt.Rows.Count > 0)
                    {
                        //scrivo la tabella all'interno del caricatore
                        string path = Path.Combine(_basePath, day.ToString("yyyyMMdd") + "_" + tabID.Value + ".xml");
                        dt.WriteXml(path);
                    }
                }
            }
            catch (Exception)
            {
            }
        }
示例#22
0
        /// <summary>
        /// 根据关键词获取小说列表
        /// </summary>
        /// <returns></returns>
        public List <tb_fiction_info> _o_Get_Fiction_Info_By_KeyWord()
        {
            //判断关键字
            if (_str_KeyWord == "")
            {
                return(null);
            }

            List <tb_fiction_info> _ltfi_ret = new List <tb_fiction_info>();

            HtmlWeb _web_Main = new HtmlWeb();

            _web_Main.OverrideEncoding = Encoding.UTF8;
            try
            {
                HtmlAgilityPack.HtmlDocument _doc_Main = new HtmlAgilityPack.HtmlDocument();
                _doc_Main = _web_Main.Load(_url_Search + _str_KeyWord);
                //判断是否有数据
                if (_doc_Main.Text == "")
                {
                    return(null);
                }

                //获取查询列表
                HtmlNodeCollection _hnc_Search_List = _doc_Main.DocumentNode.SelectNodes("//div[starts-with(@class,'search-list')]/ul/li");
                //查询列表第一项为表头,所有查询项数据需要大于1
                if (_hnc_Search_List.Count == 1)
                {
                    return(null);
                }
                //移除表头
                _hnc_Search_List.RemoveAt(0);

                foreach (HtmlNode _hn in _hnc_Search_List)
                {
                    HtmlAgilityPack.HtmlDocument _doc_One = new HtmlAgilityPack.HtmlDocument();
                    _doc_One.LoadHtml(_hn.InnerHtml);
                    tb_fiction_info _tfi = new tb_fiction_info();
                    //获取小说类型
                    HtmlNodeCollection _hnc_Fiction_Type = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s1')]");
                    if (_hnc_Fiction_Type != null && _hnc_Fiction_Type.Count > 0)
                    {
                        _tfi.col_fiction_type = _hnc_Fiction_Type[0].InnerText.Replace("[", "").Replace("]", "");
                    }
                    //获取小说名称及主页链接
                    HtmlNodeCollection _hnc_Fiction_Name_URL = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s2')]/a");
                    if (_hnc_Fiction_Name_URL != null && _hnc_Fiction_Name_URL.Count > 0)
                    {
                        _tfi.col_fiction_name = _hnc_Fiction_Name_URL[0].InnerText.Trim();
                        _tfi.col_url_homepage = _hnc_Fiction_Name_URL[0].Attributes["href"].Value;
                    }
                    //获取最新章节及链接
                    HtmlNodeCollection _hnc_Update_Chapter_URL = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s3')]/a");
                    if (_hnc_Update_Chapter_URL != null && _hnc_Update_Chapter_URL.Count > 0)
                    {
                        _tfi.col_update_chapter     = _hnc_Update_Chapter_URL[0].InnerText;
                        _tfi.col_update_chapter_url = _hnc_Update_Chapter_URL[0].Attributes["href"].Value;
                    }
                    //获取小说作者
                    HtmlNodeCollection _hnc_Fiction_Author = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s4')]");
                    if (_hnc_Fiction_Author != null && _hnc_Fiction_Author.Count > 0)
                    {
                        _tfi.col_fiction_author = _hnc_Fiction_Author[0].InnerText;
                    }
                    //获取点击数
                    HtmlNodeCollection _hnc_Click_Count = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s5')]");
                    if (_hnc_Click_Count != null && _hnc_Click_Count.Count > 0)
                    {
                        _tfi.col_click_count = _hnc_Click_Count[0].InnerText;
                    }
                    //获取更新时间
                    HtmlNodeCollection _hnc_Update_Time = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s6')]");
                    if (_hnc_Update_Time != null && _hnc_Update_Time.Count > 0)
                    {
                        _tfi.col_update_time = DateTime.Parse(_hnc_Update_Time[0].InnerText);
                    }
                    //获取小说状态
                    HtmlNodeCollection _hnc_Fiction_Stata = _doc_One.DocumentNode.SelectNodes("//span[starts-with(@class,'s7')]");
                    if (_hnc_Fiction_Stata != null && _hnc_Fiction_Stata.Count > 0)
                    {
                        _tfi.col_fiction_stata = _hnc_Fiction_Stata[0].InnerText;
                    }
                    _tfi.col_fiction_source = "笔趣阁";

                    _ltfi_ret.Add(_tfi);
                }
                return(_ltfi_ret);
            }
            catch
            {
                return(null);
            }
        }
示例#23
0
        public List <Info> GetChapList(string address)
        {
            try
            {
                if (!GetNode(address))
                {
                    return(null);
                }
                HtmlWeb            web      = WebConfig(isEncodeGB2312);
                var                doc      = web.Load(address);
                HtmlNodeCollection chapList = doc.DocumentNode.SelectNodes(chapListNode);

                if (startNode > 0 || endNode > 0)
                {
                    chapList = RemoveSurplusChap(chapList, startNode, endNode);
                }

                //Xử lý text bị đặt sai thứ tự (3, 2, 1, 6, 5, 4,...) thành (1, 2, 3, 4, 5, 6,...)
                List <Info> listInfo = new List <Info>();
                if (isRightToLeft)
                {
                    HtmlNode temp;
                    for (int i = 0; i < chapList.Count; i += 3)
                    {
                        temp            = chapList[i];
                        chapList[i]     = chapList[i + 2];
                        chapList[i + 2] = temp;
                    }

                    List <int> deleteChap = new List <int>();
                    Parallel.For(0, chapList.Count, i =>
                    {
                        if (chapList[i].InnerHtml == "&nbsp;")
                        {
                            deleteChap.Add(i);
                        }
                    });

                    deleteChap.Sort();
                    for (int i = deleteChap.Count - 1; i >= 0; i--)
                    {
                        chapList.RemoveAt(deleteChap[i]);
                    }
                }


                for (int i = 0; i < chapList.Count; i++)
                {
                    Info     info = new Info();
                    HtmlNode data = chapList[i];
                    if (isRightToLeft)
                    {
                        info.Title   = chapList[i].SelectSingleNode("a").InnerText;
                        info.Address = chapAddressNode + chapList[i].SelectSingleNode("a").Attributes["href"].Value;
                    }
                    else
                    {
                        info.Title   = chapList[i].InnerText;
                        info.Address = chapAddressNode + chapList[i].Attributes["href"].Value;
                    }

                    listInfo.Add(info);
                }

                if (isReverse)
                {
                    listInfo.Reverse();
                }

                return(listInfo);
            }
            catch (Exception e)
            {
                MessageBox.Show(e.Message, "Lỗi", MessageBoxButtons.OK, MessageBoxIcon.Error);
                return(null);
            }
        }