Exemplo n.º 1
0
        private bool GenerateNewPage(IListSheet listSheet)
        {
            bool   succeed            = true;
            string exportDir          = this.RunPage.GetExportDir();
            string pageSourceDir      = this.RunPage.GetDetailSourceFileDir();
            string outputTitleTextDir = Path.Combine(exportDir, "titleText");

            Dictionary <string, int> subjectColumnDic = CommonUtil.InitStringIndexDic(new string[] {
                "detailPageUrl",
                "detailPageName",
                "cookie",
                "grabStatus",
                "giveUpGrab",
                "subjectIndex",
                "id",
                "title",
                "googleUrl",
                "creator",
                "createDate",
                "messageCount",
                "authorCount"
            });
            string      subjectFileExcelPath = Path.Combine(exportDir, this.RunPage.Project.Name + "_List.xlsx");
            ExcelWriter subjectEW            = new ExcelWriter(subjectFileExcelPath, "List", subjectColumnDic);

            Dictionary <string, int> subjectImportColumnDic = CommonUtil.InitStringIndexDic(new string[] {
                "index",
                "id",
                "creator",
                "createDate",
                "commentPublisher",
                "commentPublished",
                "googleUrl",
                "title",
                "url"
            });
            string    subjectFileXmlPath = Path.Combine(exportDir, this.RunPage.Project.Name + "_List.xml");
            XmlWriter subjectXW          = new XmlWriter(subjectFileXmlPath, subjectImportColumnDic);

            string[] commentUsers = new string[] { "sunhua", "shizhengzhong", "liyuzhu" };

            int sujectIndex = 1;

            for (int i = listSheet.RowCount - 1; i >= 0; i--)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string     url           = row[SysConfig.DetailPageUrlFieldName];
                    string     localFilePath = this.RunPage.GetFilePath(url, pageSourceDir);
                    TextReader tr            = null;

                    try
                    {
                        tr = new StreamReader(localFilePath);
                        string webPageHtml = tr.ReadToEnd();

                        HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                        htmlDoc.LoadHtml(webPageHtml);
                        HtmlNodeCollection subjectNodes = htmlDoc.DocumentNode.SelectNodes("//body[1]/table[1]/tr");

                        if (subjectNodes != null)
                        {
                            for (int j = subjectNodes.Count - 1; j >= 1; j--)
                            {
                                HtmlNode subjectNode      = subjectNodes[j];
                                HtmlNode titleNode        = subjectNode.SelectSingleNode("./td[@class=\"subject\"]/a");
                                string   title            = CommonUtil.HtmlDecode(CommonUtil.ReplaceAsciiByString(titleNode.InnerText.Trim()));
                                string   detailPageUrl    = titleNode.Attributes["href"].Value.Trim();
                                string   detailPageName   = detailPageUrl.Substring(detailPageUrl.LastIndexOf("/") + 1);
                                HtmlNode authorNode       = subjectNode.SelectSingleNode("./td[@class=\"author\"]");
                                string   author           = authorNode == null ? "" : CommonUtil.HtmlDecode(CommonUtil.ReplaceAsciiByString(authorNode.InnerText.Trim()));
                                HtmlNode lastPostDateNode = subjectNode.SelectSingleNode("./td[@class=\"lastPostDate\"]");
                                string   lastPostDate     = lastPostDateNode == null ? "" : lastPostDateNode.InnerText.Trim();

                                Dictionary <string, string> commentF2vs = new Dictionary <string, string>();
                                commentF2vs.Add("subjectIndex", sujectIndex.ToString());
                                commentF2vs.Add("detailPageUrl", detailPageUrl);
                                commentF2vs.Add("detailPageName", detailPageName);
                                commentF2vs.Add("id", detailPageName);
                                commentF2vs.Add("title", title);
                                commentF2vs.Add("googleUrl", detailPageUrl);
                                commentF2vs.Add("creator", author);
                                commentF2vs.Add("createDate", lastPostDate);
                                subjectEW.AddRow(commentF2vs);

                                for (int u = 0; u < commentUsers.Length; u++)
                                {
                                    string user = commentUsers[u];
                                    Dictionary <string, string> commentXF2vs = new Dictionary <string, string>();
                                    commentXF2vs.Add("index", sujectIndex.ToString());
                                    commentXF2vs.Add("id", user + "_" + detailPageName);
                                    commentXF2vs.Add("title", title);
                                    commentXF2vs.Add("url", detailPageName + ".html");
                                    commentXF2vs.Add("googleUrl", detailPageUrl);
                                    commentXF2vs.Add("creator", author);
                                    commentXF2vs.Add("createDate", lastPostDate);
                                    commentXF2vs.Add("commentPublisher", user);
                                    commentXF2vs.Add("commentPublished", "No");
                                    subjectXW.AddRow(commentXF2vs);
                                }

                                sujectIndex++;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        if (tr != null)
                        {
                            tr.Close();
                            tr.Dispose();
                        }
                        throw ex;
                    }
                }
            }
            subjectXW.SaveToDisk();
            subjectEW.SaveToDisk();
            return(succeed);
        }
Exemplo n.º 2
0
        private bool GenerateNewPage(IListSheet listSheet)
        {
            bool   succeed            = true;
            string exportDir          = this.RunPage.GetExportDir();
            string pageSourceDir      = this.RunPage.GetDetailSourceFileDir();
            string outputTitleTextDir = Path.Combine(exportDir, "titleText");

            Dictionary <string, int> subjectColumnDic = CommonUtil.InitStringIndexDic(new string[] {
                "index",
                "id",
                "title",
                "url",
                "googleUrl",
                "creator",
                "createDate",
                "messageCount",
                "authorCount",
                "commentPublisher",
                "commentPublished"
            });
            string      subjectFileExcelPath = Path.Combine(exportDir, this.RunPage.Project.Name + "_List.xlsx");
            ExcelWriter subjectEW            = new ExcelWriter(subjectFileExcelPath, "List", subjectColumnDic);

            string    subjectFileXmlPath = Path.Combine(exportDir, this.RunPage.Project.Name + "_List.xml");
            XmlWriter subjectXW          = new XmlWriter(subjectFileXmlPath, subjectColumnDic);


            Dictionary <string, int> allCommentsColumnDic = new Dictionary <string, int>();

            allCommentsColumnDic.Add("subjectIndex", 0);
            allCommentsColumnDic.Add("googleUrl", 1);
            allCommentsColumnDic.Add("creator", 2);
            allCommentsColumnDic.Add("author", 3);
            allCommentsColumnDic.Add("lastPostDate", 4);
            string      allCommentsFilePath = Path.Combine(exportDir, this.RunPage.Project.Name + "_AllComments.xlsx");
            ExcelWriter allCommentsListEW   = new ExcelWriter(allCommentsFilePath, "List", allCommentsColumnDic);

            for (int i = 0; i < listSheet.RowCount; i++)
            {
                Dictionary <string, string> row = listSheet.GetRow(i);
                bool giveUp = "Y".Equals(row[SysConfig.GiveUpGrabFieldName]);
                if (!giveUp)
                {
                    string     index         = row[SysConfig.ListPageIndexFieldName].PadLeft(4, '0');
                    string     googleUrl     = row[SysConfig.DetailPageUrlFieldName];
                    string     title         = row["title"];
                    string     creator       = row["creator"];
                    string     createDate    = row["createDate"];
                    string     localFilePath = this.RunPage.GetFilePath(googleUrl, pageSourceDir);
                    TextReader tr            = null;

                    try
                    {
                        tr = new StreamReader(localFilePath);
                        string webPageHtml = tr.ReadToEnd();

                        HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
                        htmlDoc.LoadHtml(webPageHtml);

                        string messagesNumStr = htmlDoc.DocumentNode.SelectSingleNode("//body/i").InnerText;
                        int    ofIndex        = messagesNumStr.IndexOf("of");
                        int    messagesIndex  = messagesNumStr.IndexOf("messages");
                        string messageCount   = messagesNumStr.Substring(ofIndex + 2, messagesIndex - ofIndex - 2).Trim();

                        List <string> authorNames = new List <string>();

                        HtmlNodeCollection messageNodes = htmlDoc.DocumentNode.SelectNodes("//body/table/tr");

                        if (messageNodes != null)
                        {
                            for (int j = 0; j < messageNodes.Count; j++)
                            {
                                HtmlNode messageNode      = messageNodes[j];
                                HtmlNode authorNode       = messageNode.SelectSingleNode("./td[2]");
                                HtmlNode lastPostDateNode = messageNode.SelectSingleNode("./td[3]");
                                if (authorNode != null)
                                {
                                    string author = HttpUtility.HtmlDecode(authorNode.InnerText).Trim();
                                    if (j == 0)
                                    {
                                        creator = author;
                                    }
                                    if (!authorNames.Contains(author))
                                    {
                                        authorNames.Add(author);
                                    }
                                    string lastPostDate = lastPostDateNode == null ? "" : lastPostDateNode.InnerText;

                                    Dictionary <string, string> commentF2vs = new Dictionary <string, string>();
                                    commentF2vs.Add("subjectIndex", index);
                                    commentF2vs.Add("googleUrl", googleUrl);
                                    commentF2vs.Add("creator", creator);
                                    commentF2vs.Add("author", author);
                                    commentF2vs.Add("lastPostDate", lastPostDate);
                                    allCommentsListEW.AddRow(commentF2vs);
                                }
                            }
                        }

                        //修改html内容,增加线框
                        HtmlNode tableNode = htmlDoc.DocumentNode.SelectSingleNode("//body/table");
                        tableNode.Attributes["border"].Value = "1";
                        int    localHtmlUrlStartIndex = googleUrl.IndexOf("/idempiere/") + "/idempiere/".Length;
                        string htmlLocalName          = CommonUtil.ProcessFileName(googleUrl.Substring(localHtmlUrlStartIndex), "_") + ".html";
                        string htmlLocalUrl           = Path.Combine(Path.GetDirectoryName(pageSourceDir), "export\\html\\" + htmlLocalName);
                        CommonUtil.CreateFileDirectory(htmlLocalUrl);
                        htmlDoc.Save(htmlLocalUrl);

                        Dictionary <string, string> f2vs = new Dictionary <string, string>();
                        f2vs.Add("index", index);
                        f2vs.Add("title", title);
                        f2vs.Add("googleUrl", googleUrl);
                        f2vs.Add("url", htmlLocalName);
                        f2vs.Add("creator", creator);
                        f2vs.Add("createDate", createDate);
                        f2vs.Add("messageCount", messageNodes.Count.ToString());
                        f2vs.Add("authorCount", authorNames.Count.ToString());
                        f2vs.Add("commentPublisher", "");
                        f2vs.Add("commentPublished", "");
                        f2vs.Add("id", "");

                        IRow newPageListRow = subjectEW.AddRow(f2vs);

                        f2vs["commentPublished"] = "No";

                        f2vs["commentPublisher"] = "sunhua";
                        f2vs["id"] = "sunhua" + index;
                        subjectXW.AddRow(f2vs);

                        f2vs["commentPublisher"] = "shizhengzhong";
                        f2vs["id"] = "shizhengzhong" + index;
                        subjectXW.AddRow(f2vs);

                        f2vs["commentPublisher"] = "liyuzhu";
                        f2vs["id"] = "liyuzhu" + index;
                        subjectXW.AddRow(f2vs);

                        ICell      localUrlCell = subjectEW.GetCell(newPageListRow, "url", true);
                        IHyperlink hyperlink    = new XSSFHyperlink(HyperlinkType.File);
                        hyperlink.Address      = "html/" + htmlLocalName;
                        localUrlCell.Hyperlink = hyperlink;

                        //保存message文本
                        string msg = Encoding.UTF8.GetString(Encoding.Convert(Encoding.ASCII, Encoding.UTF8, Encoding.ASCII.GetBytes(title))).Trim();

                        if (msg.Length > 0)
                        {
                            string titleTextFilePath = Path.Combine(outputTitleTextDir, index + ".txt");
                            CommonUtil.CreateFileDirectory(titleTextFilePath);

                            TextWriter tw = null;
                            try
                            {
                                tw = new StreamWriter(titleTextFilePath, false, new UTF8Encoding(false));
                                tw.Write(msg);
                                tw.Flush();
                            }
                            catch (Exception ee)
                            {
                                throw ee;
                            }
                            finally
                            {
                                if (tw != null)
                                {
                                    tw.Close();
                                    tw.Dispose();
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        if (tr != null)
                        {
                            tr.Dispose();
                            tr = null;
                        }
                        this.RunPage.InvokeAppendLogText("读取出错. " + ex.Message + " LocalPath = " + localFilePath, LogLevelType.Error, true);
                    }
                }
            }
            subjectXW.SaveToDisk();
            subjectEW.SaveToDisk();
            allCommentsListEW.SaveToDisk();
            return(succeed);
        }