Beispiel #1
0
        //读取
        public List <PracticeInfo> getPracticeInfoFromFile(string file)
        {
            StreamReader sr   = new StreamReader(file, Encoding.UTF8);
            String       data = sr.ReadToEnd();
            //用于存储读取的每一行数据
            List <String>       li   = new List <String>();
            List <PracticeInfo> list = new List <PracticeInfo>();

            //按行切分,存入li
            string[] m = data.Split(new char[] { '\n' });
            for (int k = 0; k < m.Length; k++)
            {
                li.Add(m[k]);
            }
            sr.Close();

            //每行切分,存入具体的pracinfo
            for (int i = 0; i < li.Count - 1; i++)
            {
                PracticeInfo tempPrac = new PracticeInfo();
                string       tempLi   = li[i];
                string[]     n        = tempLi.Split(new char[] { '\t' });
                SetPracticeInfoByString(tempPrac, n);
                list.Add(tempPrac);
            }

            return(list);
        }
Beispiel #2
0
 public void SetPracticeInfoByString(PracticeInfo prf, String[] contentlist)
 {
     prf.name     = contentlist[0];
     prf.request  = contentlist[1];
     prf.duty     = contentlist[2];
     prf.location = contentlist[3];
     prf.compname = contentlist[4];
     prf.compinfo = contentlist[5];
     prf.compweb  = contentlist[6];
 }
Beispiel #3
0
        /// <summary>
        /// return List<practiceInfo>to store all information
        /// </summary>
        /// <param name="ID"></param>
        /// <param name="ComID"></param>
        /// <returns></returns>
        public List <PracticeInfo> getPracticeInfo(List <int> ID, List <int> ComID)
        {
            List <PracticeInfo> practiceInfo = new List <PracticeInfo>();

            for (int i = 0; i < ID.Count; i++)
            {
                PracticeInfo tempInfo = new PracticeInfo();

                string url     = "https://www.nowcoder.com/recommend-intern/" + ComID[i] + "?jobId=" + ID[i];
                string content = GetContent(url);
                //清楚html占位符
                content = content.Replace("&nbsp", " ");

                //获取岗位名称
                content       = content.Substring(content.IndexOf("rec-job") + 14);
                tempInfo.name = Insurance(content.Substring(0, content.IndexOf("<")));

                //获取岗位职责
                content = content.Substring(content.IndexOf("岗位职责"));
                string dutycontent = content.Substring(0, content.IndexOf("</dl>") + 5);
                //清除换行符对正则的影响
                dutycontent = dutycontent.Replace("\n", "");
                //清楚制表符对输出时切分的影响
                dutycontent = dutycontent.Replace("\t", "");
                //正则提取>和<之间的内容
                Regex           re           = new Regex("(?<=>).*?(?=<)", RegexOptions.None);
                MatchCollection mc1          = re.Matches(dutycontent);
                string          dutycontentr = "";
                foreach (Match found in mc1)
                {
                    dutycontentr += found;
                }
                tempInfo.duty = Insurance(dutycontentr);

                //获取岗位要求
                content = content.Substring(content.IndexOf("岗位要求"));
                string reqcontent = content.Substring(0, content.IndexOf("</dl>") + 6);
                //清除换行符对正则的影响
                reqcontent = reqcontent.Replace("\n", "");
                //清楚制表符对输出时切分的影响
                reqcontent = reqcontent.Replace("\t", "");
                MatchCollection mc2         = re.Matches(reqcontent);
                string          reqcontentr = "";
                foreach (Match found in mc2)
                {
                    reqcontentr += found;
                }
                tempInfo.request = Insurance(reqcontentr);


                //获取公司名称
                content           = content.Substring(content.IndexOf("teacher-name") + 14);
                tempInfo.compname = Insurance(content.Substring(0, content.IndexOf("<")));

                //获取地址
                content           = content.Substring(content.IndexOf("com-lbs") + 9);
                tempInfo.location = Insurance(content.Substring(0, content.IndexOf("<")));

                //获取公司简介
                content = content.Substring(content.IndexOf("com-detail") - 12);
                string detailcontent = content.Substring(0, content.IndexOf("</p>") + 4);
                //清除换行符对正则的影响
                detailcontent = detailcontent.Replace("\n", "");
                //清楚制表符对输出时切分的影响
                detailcontent = detailcontent.Replace("\t", "");
                MatchCollection mc3            = re.Matches(detailcontent);
                string          detailcontentr = "";
                foreach (Match found in mc3)
                {
                    detailcontentr += found;
                }
                tempInfo.compinfo = Insurance(detailcontentr);


                //获取公司网址
                content          = content.Substring(content.IndexOf("http"));
                tempInfo.compweb = Insurance(content.Substring(0, content.IndexOf("\"")));

                practiceInfo.Add(tempInfo);
            }
            return(practiceInfo);
        }