Пример #1
0
        private void button9_Click(object sender, EventArgs e)
        {
            foreach (DataRow dr1 in mydt.Rows)
            {
                // logger.AppendText("==>"+dr1[24].ToString());
                string price = getPriceFromDesc(dr1[24].ToString());
                //logger.AppendText("price==>"+price);
                if (price != "0")
                {
                    dr1[7] = price;
                }
                //logger.AppendText(
            }

            if (saveCsvFile.ShowDialog() == DialogResult.OK)
            {
                saveCsvFile.Filter = "csv file(*.csv)|*.csv";
                string filename = saveCsvFile.FileName;
                //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 };
                CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode);
                csw.QuoteMark(TBCSV.getQuoteMark());
                csw.AddData(mydt, 1);
                //csw.FirstRowQuote = 1;
                csw.Save();
            }
            else
            {
                MessageBox.Show("没保存");
            }
            MessageBox.Show("共生成" + (mydt.Rows.Count - 1) + "条数据");
        }
Пример #2
0
        private void button6_Click(object sender, EventArgs e)
        {
            //mydt

            foreach (DataRow dr1 in mydt.Rows)
            {
                foreach (DataRow dr2 in rewriteDT.Rows)
                {
                    if (dr1[0].ToString() == dr2[0].ToString())
                    {
                        dr1[40] = dr2[1];
                        dr1[7]  = dr2[2];
                    }
                }
            }

            if (saveCsvFile.ShowDialog() == DialogResult.OK)
            {
                saveCsvFile.Filter = "csv file(*.csv)|*.csv";
                string filename = saveCsvFile.FileName;
                //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 };
                CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode);
                csw.QuoteMark(TBCSV.getQuoteMark());
                csw.AddData(mydt, 1);
                //csw.FirstRowQuote = 1;
                csw.Save();
            }
            else
            {
                MessageBox.Show("没保存");
            }
            MessageBox.Show("共生成" + (mydt.Rows.Count - 1) + "条数据");
        }
Пример #3
0
        private void button5_Click_1(object sender, EventArgs e)
        {
            //mydt
            if (mydt == null || mydt.Rows.Count == 0)
            {
                MessageBox.Show("请先导入基础csv数据文件!");
                return;
            }
            DataTable exportDT = new DataTable();

            foreach (string head in taobaoCSVHead)
            {
                exportDT.Columns.Add(head);
            }
            DataRow drHead = exportDT.NewRow();

            for (int i = 0; i < taobaoCSVHead.Length; i++)
            {
                drHead[i] = taobaoCSVHead[i];
            }
            exportDT.Rows.Add(drHead);
            for (int i = 0; i < mydt.Rows.Count; i++)
            {
                DataRow dr = exportDT.NewRow();
                for (int j = 0; j < taobaoCSVHead.Length; j++)
                {
                    //mydt.Rows[i][0].ToString();
                    dr[j] = taobaoCSVDefault[j];
                    Debug.WriteLine("dr[" + j + "]===" + dr[j]);
                }
                dr[0]  = mydt.Rows[i][0].ToString();
                dr[40] = mydt.Rows[i][1].ToString();
                exportDT.Rows.Add(dr);
                //Debug.WriteLine("dr[0]+dr[40]" + dr[0] + dr[40]);
            }
            Debug.WriteLine("exportDT.Rows.Count==" + exportDT.Rows.Count);
            if (saveCsvFile.ShowDialog() == DialogResult.OK)
            {
                string filename = saveCsvFile.FileName;
                //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 };
                CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode);
                csw.QuoteMark(TBCSV.getQuoteMark());
                csw.AddData(exportDT, 1);
                //csw.FirstRowQuote = 1;
                csw.Save();
            }
        }
Пример #4
0
        private void button3_Click(object sender, EventArgs e)
        {
            if (mydt == null || mydt.Rows.Count == 0)
            {
                MessageBox.Show("请先导入需要抓取的内容");
                return;
            }
            DataTable tempDt = mydt.Clone();

            for (int i = 0; i < mydt.Rows.Count; i++)
            {
                DataRow dr = mydt.Rows[i];
                if (dr[24].ToString() == "" || i == 0)
                {
                    tempDt.ImportRow(mydt.Rows[i]);
                }
            }

            //MessageBox.Show(tempDt.Columns.Count+"=="+mydt.Rows[3].ItemArray.Length);

            if (saveCsvFile.ShowDialog() == DialogResult.OK)
            {
                //saveCsvFile.Filter = "csv file|*.csv";
                string filename = saveCsvFile.FileName;

                /*string[] taobaoCSVHead ={ "宝贝名称", "宝贝类目", "店铺类目", "新旧程度", "省", "城市", "出售方式", "宝贝价格", "加价幅度", "宝贝数量",
                 *  "有效期", "运费承担", "平邮", "EMS", "快递", "付款方式","支付宝", "发票", "保修", "自动重发",
                 *  "放入仓库", "橱窗推荐", "开始时间", "心情故事", "宝贝描述","宝贝图片","宝贝属性","团购价","最小团购件数",
                 *  "邮费模版ID","会员打折","修改时间","上传状态","图片状态","返点比例","新图片","视频","销售属性组合","用户输入ID串",
                 *  "用户输入名-值对","商家编码","销售属性别名","宝贝编号"};
                 * // int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0,
                 *                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                 *                 0, 0, 1, 1, 1, 0, 0, 1, 0, 0,
                 *                 1, 1, 1, 1, 1,1,1,1,1,1,
                 *                 1,1,1,1};
                 */
                CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode);
                csw.QuoteMark(TBCSV.getQuoteMark());
                csw.AddData(tempDt, 1);
                csw.Save();
                MessageBox.Show("保存成功,生成" + (tempDt.Rows.Count - 1) + "条数据");
            }
            else
            {
                MessageBox.Show("没保存");
            }
        }
Пример #5
0
        private void button10_Click(object sender, EventArgs e)
        {
            //DataTable dt = new DataTable();
            //DataTable newDt = new DataTable();
            List <String> fileNameList = new List <String>();

            for (int i = 0; i < mydt.Rows.Count; i++)
            {
                DataRow dr1      = mydt.Rows[i];
                string  fileName = (String)dr1[0];

                if (fileNameList.Contains(fileName.Trim()))
                {
                    dr1.Delete();
                    i--;
                }
                else
                {
                    fileNameList.Add(fileName.Trim());
                }
            }

            if (saveCsvFile.ShowDialog() == DialogResult.OK)
            {
                saveCsvFile.Filter = "csv file(*.csv)|*.csv";
                string filename = saveCsvFile.FileName;
                //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 };
                CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode);
                csw.QuoteMark(TBCSV.getQuoteMark());
                csw.AddData(mydt, 1);
                //csw.FirstRowQuote = 1;
                csw.Save();
            }
            else
            {
                MessageBox.Show("没保存");
            }
            MessageBox.Show("共生成" + (mydt.Rows.Count) + "条数据");
        }
Пример #6
0
        private void downloadTask()
        {
            //Console.Write(webBrowser1.DocumentText);

            //string htmlString = webBrowser1.DocumentText;

            string htmlString = wbHtml;
            string formString = "";
            string formStart  = "<form method=\"POST\" action=\"searchfull.asp?plu_title=";

            if (htmlString.IndexOf(formStart) == -1)
            {
                logger("没有找到Form起点!");
                return;
            }
            formString = htmlString.Substring(htmlString.IndexOf(formStart));
            if (formString.IndexOf("</form>") == -1)
            {
                logger("没有找到Form终点!");
                return;
            }
            formString = formString.Substring(0, formString.IndexOf("</form>"));
            Console.WriteLine("formString========" + formString);
            string searchString    = "";
            string totalPageString = "";

            if (formString.IndexOf("共") == -1)
            {
                logger("没有找到总页数!");
                return;
            }
            totalPageString = formString.Substring(formString.IndexOf("共") + 1);
            if (formString.IndexOf("页") == -1)
            {
                logger("没有找到总页数!");
                return;
            }
            totalPageString = totalPageString.Substring(0, totalPageString.IndexOf("页")).Trim();
            loggerLine("共 " + totalPageString + "页 ");
            int totalPage = Convert.ToInt32(totalPageString);

            Console.WriteLine("totalPage====" + totalPage);
            if (formString.IndexOf("searchfull.asp") == -1)
            {
                logger("关键字没找到");
                return;
            }
            searchString = formString.Substring(formString.IndexOf("searchfull.asp"));
            if (formString.IndexOf(QUERY_SEPARATE + "page=") == -1)
            {
                logger("关键字没找到!");
                return;
            }
            searchString = searchString.Substring(0, searchString.IndexOf(QUERY_SEPARATE + "page="));
            Console.WriteLine("searchString====" + searchString);
            string orderString = "";

            if (formString.IndexOf("order=") == -1)
            {
                logger("Order没有找到!");
                return;
            }
            orderString = formString.Substring(formString.IndexOf("order=") + 6);
            if (formString.IndexOf("\">") == -1)
            {
                logger("关键字没找到!");
                return;
            }
            orderString = orderString.Substring(0, orderString.IndexOf("\">"));

            Console.WriteLine("orderString====" + orderString);

            string        pageHtml = "";
            List <string> urlList  = new List <string>();

            int[]  pageArray  = new int[] { 1, totalPage };
            string pageNumber = pageRange.Text;

            if (pageNumber != "")
            {
                string[] tempArray = pageNumber.Split('-');
                if (tempArray.Length == 1)
                {
                    pageArray[0] = Convert.ToInt32(tempArray[0]);
                    pageArray[1] = Convert.ToInt32(tempArray[0]);
                }
                else
                {
                    pageArray[0] = Convert.ToInt32(tempArray[0]);
                    pageArray[1] = Convert.ToInt32(tempArray[1]);
                }
            }
            for (int i = Math.Max(pageArray[0], 1); i <= Math.Min(pageArray[1], totalPage); i++)
            {
                logger("查找第" + i + "页的图书...");
                pageHtml = getPageContent(searchString, i, orderString);
                string detailPage = "href=\"views.asp";

                while (pageHtml.IndexOf(detailPage) != -1)
                {
                    pageHtml = pageHtml.Substring(pageHtml.IndexOf(detailPage) + detailPage.Length);
                    string url = pageHtml.Substring(0, pageHtml.IndexOf("\" target="));
                    urlList.Add(webViewUrl + url);
                }


                loggerLine("完成!");
            }


            /////////////
            TBCSV     csv  = new TBCSV();
            DataTable mydt = csv.getDefaultDataTable();


            string[] values = TBCSV.getTaobaoDefaultValue();

            // WebUtil client = new WebUtil();
            string htmlTemp = "";
            int    m        = 1;

            foreach (string urlstring in urlList)
            {
                logger(urlstring + "第[" + m + "]本书..");
                try {
                    htmlTemp = HttpRequestUtil.requestURL(urlstring, "utf-8");
                    if (htmlTemp == null || htmlTemp == "")
                    {
                        logger("出错,请手动下载!!");
                        continue;
                    }
                    m++;
                    DataRow dr = mydt.NewRow();
                    for (int j = 0; j < values.Length; j++)
                    {
                        dr[j] = values[j];
                    }
                    string bookName = getBookNameFromHtml(htmlTemp);//书名
                    dr["title"] = bookName;
                    //logger(dr[0]+">>");
                    //Console.WriteLine("name==" + dr[0]);

                    dr["price"] = getBookPriceFromHtml(htmlTemp);

                    string author = getAuthorFromHtml(htmlTemp);
                    //Console.WriteLine("price==" + dr[7]);
                    string ISBN = getBookISBNFromHtml(htmlTemp);
                    dr["outer_id"] = ISBN;                           //商家编吗isbn
                    string publishHouse = getPublishHouse(htmlTemp); //出版社
                    //下面两行的赋值顺序不能改动
                    //dr["inputPids"] = "1636953,2043183,2043189,122216620";
                    dr["inputPids"]   = "1636953,2043183,2043189,122216620";
                    dr["inputValues"] = ISBN.Replace("-", "") + "," + bookName + "," + author + "," + publishHouse;

                    //dr["inputValues"] = ISBN.Replace("-", "");

                    string desc         = getBookInfo(htmlTemp); //描述
                    string bookNameHtml = "<table width=\"100%\" border=\"0\" cellspacing=\"0\" cellpadding=\"0\" align=\"center\" bgcolor=\"#ccccff\">\r\n<tr bgcolor=#ccccff>\r\n<td>" + bookName + "</td></tr></table>";
                    dr["description"] = bookNameHtml + desc;     //描述

                    //Console.WriteLine("isbn==" + dr[40]);
                    string bookImage = getBookImageFromHtml(htmlTemp);
                    if (bookImage != "")
                    {
                        bookImage = bookImage.Substring(0, bookImage.Length - 4) + ":1:0:|;";
                        logger("图片下载成功...");
                    }
                    else
                    {
                        logger("图片下载失败...");
                    }
                    dr["picture_status"] = "1;";
                    dr["picture"]        = bookImage;
                    mydt.Rows.Add(dr);
                } catch (Exception e) {
                    loggerLine(e.ToString());
                }
                loggerLine("下载完成!");
            }

            CsvStreamWriter csw = new CsvStreamWriter(CsvFileName, System.Text.Encoding.Unicode);

            csw.QuoteMark(TBCSV.getQuoteMark());
            csw.AddData(mydt, 1);
            csw.FirstRowQuote = 2;
            csw.Save();
            MessageBox.Show("保存成功,生成" + (mydt.Rows.Count - 1) + "条数据");
            button3.Enabled = true;
        }
Пример #7
0
        private void button8_Click(object sender, EventArgs e)
        {
            try
            {
                if (mydt == null || mydt.Rows.Count == 0)
                {
                    MessageBox.Show("请先导入基础csv数据文件!");
                    return;
                }
                if (duplicateDt == null || duplicateDt.Rows.Count == 0)
                {
                    MessageBox.Show("请导入需要去除的重复的csv文件!");
                }

                DataTable tempDt = duplicateDt.Clone();

                //DataTable exportDT = new DataTable();

                /*
                 * foreach (string head in taobaoCSVHead)
                 * {
                 *  tempDt.Columns.Add(head);
                 * }
                 */
                DataRow drHead = tempDt.NewRow();


                for (int i = 0; i < duplicateDt.Columns.Count; i++)
                {
                    DataRow dr = duplicateDt.Rows[0];
                    drHead[i] = dr[i];
                }
                tempDt.Rows.Add(drHead);
                //tempDt.Rows.Add(
                for (int i = 0; i < duplicateDt.Rows.Count; i++)
                {
                    DataRow dr = duplicateDt.Rows[i];

                    if (chkName.Checked && chkPrice.Checked)
                    {
                        DataColumn[] dc = new DataColumn[] { mydt.Columns[0], mydt.Columns[7] };
                        mydt.PrimaryKey = dc;
                        if (!isSameRecord(mydt, new Object[] { dr[0].ToString(), dr[1].ToString() }))
                        {
                            tempDt.ImportRow(dr);
                        }
                    }
                    else if (chkName.Checked)
                    {
                        //mydt

                        bool isFind = false;
                        foreach (DataRow mydr in mydt.Rows)
                        {
                            if (dr[0].ToString() == mydr[0].ToString())
                            {
                                isFind = true;
                                break;
                            }
                        }
                        if (!isFind)
                        {
                            tempDt.ImportRow(dr);
                        }

                        /*
                         * mydt.PrimaryKey = new DataColumn[1] { mydt.Columns[0] };
                         * if (!isSameRecord(mydt, new Object[] { dr[0].ToString() }))
                         * {
                         *  tempDt.ImportRow(dr);
                         * }
                         */
                    }
                    else if (chkPrice.Checked)
                    {
                        mydt.PrimaryKey = new DataColumn[1] {
                            mydt.Columns[7]
                        };
                        if (!isSameRecord(mydt, new Object[] { dr[7].ToString() }))
                        {
                            tempDt.ImportRow(dr);
                        }
                    }
                }


                //MessageBox.Show(tempDt.Columns.Count+"=="+mydt.Rows[3].ItemArray.Length);

                if (saveCsvFile.ShowDialog() == DialogResult.OK)
                {
                    //saveCsvFile.Filter = "csv file|*.csv";
                    string filename = saveCsvFile.FileName;
                    //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 };
                    CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode);
                    csw.QuoteMark(TBCSV.getQuoteMark());
                    csw.AddData(tempDt, 1);
                    csw.FirstRowQuote = 2;
                    csw.Save();
                }
                else
                {
                    MessageBox.Show("没保存");
                }
                MessageBox.Show("共生成" + (tempDt.Rows.Count - 1) + "条数据");
            }
            catch (Exception e1)
            {
                MessageBox.Show(e1.Message);
            }
        }
Пример #8
0
        private void button4_Click(object sender, EventArgs e)
        {
            if (mydt == null || mydt.Rows.Count == 0)
            {
                MessageBox.Show("请先导入基础csv数据文件!");
                return;
            }
            if (duplicateDt == null || duplicateDt.Rows.Count == 0)
            {
                MessageBox.Show("请导入需要去除的重复的csv文件!");
            }

            DataTable tempDt = duplicateDt.Clone();

            for (int i = 0; i < duplicateDt.Rows.Count; i++)
            {
                DataRow dr = duplicateDt.Rows[i];

                if (chkName.Checked && chkPrice.Checked)
                {
                    DataColumn[] dc = new DataColumn[] { mydt.Columns[0], mydt.Columns[7] };
                    mydt.PrimaryKey = dc;
                    if (!isSameRecord(mydt, new Object[] { dr[0].ToString(), dr[1].ToString() }))
                    {
                        tempDt.ImportRow(dr);
                    }
                }
                else if (chkName.Checked)
                {
                    //mydt
                    mydt.PrimaryKey = new DataColumn[1] {
                        mydt.Columns[0]
                    };
                    if (!isSameRecord(mydt, new Object[] { dr[0].ToString() }))
                    {
                        tempDt.ImportRow(dr);
                    }
                }
                else if (chkPrice.Checked)
                {
                    mydt.PrimaryKey = new DataColumn[1] {
                        mydt.Columns[7]
                    };
                    if (!isSameRecord(mydt, new Object[] { dr[7].ToString() }))
                    {
                        tempDt.ImportRow(dr);
                    }
                }
            }

            //MessageBox.Show(tempDt.Columns.Count+"=="+mydt.Rows[3].ItemArray.Length);

            if (saveCsvFile.ShowDialog() == DialogResult.OK)
            {
                //saveCsvFile.Filter = "csv file|*.csv";
                string filename = saveCsvFile.FileName;
                // int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 };
                CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode);
                csw.QuoteMark(TBCSV.getQuoteMark());
                csw.AddData(tempDt, 1);
                csw.FirstRowQuote = 2;
                csw.Save();
            }
            else
            {
                MessageBox.Show("没保存");
            }
            MessageBox.Show("共生成" + (tempDt.Rows.Count - 1) + "条数据");
        }