private void button9_Click(object sender, EventArgs e) { foreach (DataRow dr1 in mydt.Rows) { // logger.AppendText("==>"+dr1[24].ToString()); string price = getPriceFromDesc(dr1[24].ToString()); //logger.AppendText("price==>"+price); if (price != "0") { dr1[7] = price; } //logger.AppendText( } if (saveCsvFile.ShowDialog() == DialogResult.OK) { saveCsvFile.Filter = "csv file(*.csv)|*.csv"; string filename = saveCsvFile.FileName; //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 }; CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode); csw.QuoteMark(TBCSV.getQuoteMark()); csw.AddData(mydt, 1); //csw.FirstRowQuote = 1; csw.Save(); } else { MessageBox.Show("没保存"); } MessageBox.Show("共生成" + (mydt.Rows.Count - 1) + "条数据"); }
private void button6_Click(object sender, EventArgs e) { //mydt foreach (DataRow dr1 in mydt.Rows) { foreach (DataRow dr2 in rewriteDT.Rows) { if (dr1[0].ToString() == dr2[0].ToString()) { dr1[40] = dr2[1]; dr1[7] = dr2[2]; } } } if (saveCsvFile.ShowDialog() == DialogResult.OK) { saveCsvFile.Filter = "csv file(*.csv)|*.csv"; string filename = saveCsvFile.FileName; //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 }; CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode); csw.QuoteMark(TBCSV.getQuoteMark()); csw.AddData(mydt, 1); //csw.FirstRowQuote = 1; csw.Save(); } else { MessageBox.Show("没保存"); } MessageBox.Show("共生成" + (mydt.Rows.Count - 1) + "条数据"); }
private void button5_Click_1(object sender, EventArgs e) { //mydt if (mydt == null || mydt.Rows.Count == 0) { MessageBox.Show("请先导入基础csv数据文件!"); return; } DataTable exportDT = new DataTable(); foreach (string head in taobaoCSVHead) { exportDT.Columns.Add(head); } DataRow drHead = exportDT.NewRow(); for (int i = 0; i < taobaoCSVHead.Length; i++) { drHead[i] = taobaoCSVHead[i]; } exportDT.Rows.Add(drHead); for (int i = 0; i < mydt.Rows.Count; i++) { DataRow dr = exportDT.NewRow(); for (int j = 0; j < taobaoCSVHead.Length; j++) { //mydt.Rows[i][0].ToString(); dr[j] = taobaoCSVDefault[j]; Debug.WriteLine("dr[" + j + "]===" + dr[j]); } dr[0] = mydt.Rows[i][0].ToString(); dr[40] = mydt.Rows[i][1].ToString(); exportDT.Rows.Add(dr); //Debug.WriteLine("dr[0]+dr[40]" + dr[0] + dr[40]); } Debug.WriteLine("exportDT.Rows.Count==" + exportDT.Rows.Count); if (saveCsvFile.ShowDialog() == DialogResult.OK) { string filename = saveCsvFile.FileName; //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 }; CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode); csw.QuoteMark(TBCSV.getQuoteMark()); csw.AddData(exportDT, 1); //csw.FirstRowQuote = 1; csw.Save(); } }
private void button3_Click(object sender, EventArgs e) { if (mydt == null || mydt.Rows.Count == 0) { MessageBox.Show("请先导入需要抓取的内容"); return; } DataTable tempDt = mydt.Clone(); for (int i = 0; i < mydt.Rows.Count; i++) { DataRow dr = mydt.Rows[i]; if (dr[24].ToString() == "" || i == 0) { tempDt.ImportRow(mydt.Rows[i]); } } //MessageBox.Show(tempDt.Columns.Count+"=="+mydt.Rows[3].ItemArray.Length); if (saveCsvFile.ShowDialog() == DialogResult.OK) { //saveCsvFile.Filter = "csv file|*.csv"; string filename = saveCsvFile.FileName; /*string[] taobaoCSVHead ={ "宝贝名称", "宝贝类目", "店铺类目", "新旧程度", "省", "城市", "出售方式", "宝贝价格", "加价幅度", "宝贝数量", * "有效期", "运费承担", "平邮", "EMS", "快递", "付款方式","支付宝", "发票", "保修", "自动重发", * "放入仓库", "橱窗推荐", "开始时间", "心情故事", "宝贝描述","宝贝图片","宝贝属性","团购价","最小团购件数", * "邮费模版ID","会员打折","修改时间","上传状态","图片状态","返点比例","新图片","视频","销售属性组合","用户输入ID串", * "用户输入名-值对","商家编码","销售属性别名","宝贝编号"}; * // int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, * 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, * 1, 1, 1, 1, 1,1,1,1,1,1, * 1,1,1,1}; */ CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode); csw.QuoteMark(TBCSV.getQuoteMark()); csw.AddData(tempDt, 1); csw.Save(); MessageBox.Show("保存成功,生成" + (tempDt.Rows.Count - 1) + "条数据"); } else { MessageBox.Show("没保存"); } }
private void button10_Click(object sender, EventArgs e) { //DataTable dt = new DataTable(); //DataTable newDt = new DataTable(); List <String> fileNameList = new List <String>(); for (int i = 0; i < mydt.Rows.Count; i++) { DataRow dr1 = mydt.Rows[i]; string fileName = (String)dr1[0]; if (fileNameList.Contains(fileName.Trim())) { dr1.Delete(); i--; } else { fileNameList.Add(fileName.Trim()); } } if (saveCsvFile.ShowDialog() == DialogResult.OK) { saveCsvFile.Filter = "csv file(*.csv)|*.csv"; string filename = saveCsvFile.FileName; //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 }; CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode); csw.QuoteMark(TBCSV.getQuoteMark()); csw.AddData(mydt, 1); //csw.FirstRowQuote = 1; csw.Save(); } else { MessageBox.Show("没保存"); } MessageBox.Show("共生成" + (mydt.Rows.Count) + "条数据"); }
private void downloadTask() { //Console.Write(webBrowser1.DocumentText); //string htmlString = webBrowser1.DocumentText; string htmlString = wbHtml; string formString = ""; string formStart = "<form method=\"POST\" action=\"searchfull.asp?plu_title="; if (htmlString.IndexOf(formStart) == -1) { logger("没有找到Form起点!"); return; } formString = htmlString.Substring(htmlString.IndexOf(formStart)); if (formString.IndexOf("</form>") == -1) { logger("没有找到Form终点!"); return; } formString = formString.Substring(0, formString.IndexOf("</form>")); Console.WriteLine("formString========" + formString); string searchString = ""; string totalPageString = ""; if (formString.IndexOf("共") == -1) { logger("没有找到总页数!"); return; } totalPageString = formString.Substring(formString.IndexOf("共") + 1); if (formString.IndexOf("页") == -1) { logger("没有找到总页数!"); return; } totalPageString = totalPageString.Substring(0, totalPageString.IndexOf("页")).Trim(); loggerLine("共 " + totalPageString + "页 "); int totalPage = Convert.ToInt32(totalPageString); Console.WriteLine("totalPage====" + totalPage); if (formString.IndexOf("searchfull.asp") == -1) { logger("关键字没找到"); return; } searchString = formString.Substring(formString.IndexOf("searchfull.asp")); if (formString.IndexOf(QUERY_SEPARATE + "page=") == -1) { logger("关键字没找到!"); return; } searchString = searchString.Substring(0, searchString.IndexOf(QUERY_SEPARATE + "page=")); Console.WriteLine("searchString====" + searchString); string orderString = ""; if (formString.IndexOf("order=") == -1) { logger("Order没有找到!"); return; } orderString = formString.Substring(formString.IndexOf("order=") + 6); if (formString.IndexOf("\">") == -1) { logger("关键字没找到!"); return; } orderString = orderString.Substring(0, orderString.IndexOf("\">")); Console.WriteLine("orderString====" + orderString); string pageHtml = ""; List <string> urlList = new List <string>(); int[] pageArray = new int[] { 1, totalPage }; string pageNumber = pageRange.Text; if (pageNumber != "") { string[] tempArray = pageNumber.Split('-'); if (tempArray.Length == 1) { pageArray[0] = Convert.ToInt32(tempArray[0]); pageArray[1] = Convert.ToInt32(tempArray[0]); } else { pageArray[0] = Convert.ToInt32(tempArray[0]); pageArray[1] = Convert.ToInt32(tempArray[1]); } } for (int i = Math.Max(pageArray[0], 1); i <= Math.Min(pageArray[1], totalPage); i++) { logger("查找第" + i + "页的图书..."); pageHtml = getPageContent(searchString, i, orderString); string detailPage = "href=\"views.asp"; while (pageHtml.IndexOf(detailPage) != -1) { pageHtml = pageHtml.Substring(pageHtml.IndexOf(detailPage) + detailPage.Length); string url = pageHtml.Substring(0, pageHtml.IndexOf("\" target=")); urlList.Add(webViewUrl + url); } loggerLine("完成!"); } ///////////// TBCSV csv = new TBCSV(); DataTable mydt = csv.getDefaultDataTable(); string[] values = TBCSV.getTaobaoDefaultValue(); // WebUtil client = new WebUtil(); string htmlTemp = ""; int m = 1; foreach (string urlstring in urlList) { logger(urlstring + "第[" + m + "]本书.."); try { htmlTemp = HttpRequestUtil.requestURL(urlstring, "utf-8"); if (htmlTemp == null || htmlTemp == "") { logger("出错,请手动下载!!"); continue; } m++; DataRow dr = mydt.NewRow(); for (int j = 0; j < values.Length; j++) { dr[j] = values[j]; } string bookName = getBookNameFromHtml(htmlTemp);//书名 dr["title"] = bookName; //logger(dr[0]+">>"); //Console.WriteLine("name==" + dr[0]); dr["price"] = getBookPriceFromHtml(htmlTemp); string author = getAuthorFromHtml(htmlTemp); //Console.WriteLine("price==" + dr[7]); string ISBN = getBookISBNFromHtml(htmlTemp); dr["outer_id"] = ISBN; //商家编吗isbn string publishHouse = getPublishHouse(htmlTemp); //出版社 //下面两行的赋值顺序不能改动 //dr["inputPids"] = "1636953,2043183,2043189,122216620"; dr["inputPids"] = "1636953,2043183,2043189,122216620"; dr["inputValues"] = ISBN.Replace("-", "") + "," + bookName + "," + author + "," + publishHouse; //dr["inputValues"] = ISBN.Replace("-", ""); string desc = getBookInfo(htmlTemp); //描述 string bookNameHtml = "<table width=\"100%\" border=\"0\" cellspacing=\"0\" cellpadding=\"0\" align=\"center\" bgcolor=\"#ccccff\">\r\n<tr bgcolor=#ccccff>\r\n<td>" + bookName + "</td></tr></table>"; dr["description"] = bookNameHtml + desc; //描述 //Console.WriteLine("isbn==" + dr[40]); string bookImage = getBookImageFromHtml(htmlTemp); if (bookImage != "") { bookImage = bookImage.Substring(0, bookImage.Length - 4) + ":1:0:|;"; logger("图片下载成功..."); } else { logger("图片下载失败..."); } dr["picture_status"] = "1;"; dr["picture"] = bookImage; mydt.Rows.Add(dr); } catch (Exception e) { loggerLine(e.ToString()); } loggerLine("下载完成!"); } CsvStreamWriter csw = new CsvStreamWriter(CsvFileName, System.Text.Encoding.Unicode); csw.QuoteMark(TBCSV.getQuoteMark()); csw.AddData(mydt, 1); csw.FirstRowQuote = 2; csw.Save(); MessageBox.Show("保存成功,生成" + (mydt.Rows.Count - 1) + "条数据"); button3.Enabled = true; }
private void button8_Click(object sender, EventArgs e) { try { if (mydt == null || mydt.Rows.Count == 0) { MessageBox.Show("请先导入基础csv数据文件!"); return; } if (duplicateDt == null || duplicateDt.Rows.Count == 0) { MessageBox.Show("请导入需要去除的重复的csv文件!"); } DataTable tempDt = duplicateDt.Clone(); //DataTable exportDT = new DataTable(); /* * foreach (string head in taobaoCSVHead) * { * tempDt.Columns.Add(head); * } */ DataRow drHead = tempDt.NewRow(); for (int i = 0; i < duplicateDt.Columns.Count; i++) { DataRow dr = duplicateDt.Rows[0]; drHead[i] = dr[i]; } tempDt.Rows.Add(drHead); //tempDt.Rows.Add( for (int i = 0; i < duplicateDt.Rows.Count; i++) { DataRow dr = duplicateDt.Rows[i]; if (chkName.Checked && chkPrice.Checked) { DataColumn[] dc = new DataColumn[] { mydt.Columns[0], mydt.Columns[7] }; mydt.PrimaryKey = dc; if (!isSameRecord(mydt, new Object[] { dr[0].ToString(), dr[1].ToString() })) { tempDt.ImportRow(dr); } } else if (chkName.Checked) { //mydt bool isFind = false; foreach (DataRow mydr in mydt.Rows) { if (dr[0].ToString() == mydr[0].ToString()) { isFind = true; break; } } if (!isFind) { tempDt.ImportRow(dr); } /* * mydt.PrimaryKey = new DataColumn[1] { mydt.Columns[0] }; * if (!isSameRecord(mydt, new Object[] { dr[0].ToString() })) * { * tempDt.ImportRow(dr); * } */ } else if (chkPrice.Checked) { mydt.PrimaryKey = new DataColumn[1] { mydt.Columns[7] }; if (!isSameRecord(mydt, new Object[] { dr[7].ToString() })) { tempDt.ImportRow(dr); } } } //MessageBox.Show(tempDt.Columns.Count+"=="+mydt.Rows[3].ItemArray.Length); if (saveCsvFile.ShowDialog() == DialogResult.OK) { //saveCsvFile.Filter = "csv file|*.csv"; string filename = saveCsvFile.FileName; //int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 }; CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode); csw.QuoteMark(TBCSV.getQuoteMark()); csw.AddData(tempDt, 1); csw.FirstRowQuote = 2; csw.Save(); } else { MessageBox.Show("没保存"); } MessageBox.Show("共生成" + (tempDt.Rows.Count - 1) + "条数据"); } catch (Exception e1) { MessageBox.Show(e1.Message); } }
private void button4_Click(object sender, EventArgs e) { if (mydt == null || mydt.Rows.Count == 0) { MessageBox.Show("请先导入基础csv数据文件!"); return; } if (duplicateDt == null || duplicateDt.Rows.Count == 0) { MessageBox.Show("请导入需要去除的重复的csv文件!"); } DataTable tempDt = duplicateDt.Clone(); for (int i = 0; i < duplicateDt.Rows.Count; i++) { DataRow dr = duplicateDt.Rows[i]; if (chkName.Checked && chkPrice.Checked) { DataColumn[] dc = new DataColumn[] { mydt.Columns[0], mydt.Columns[7] }; mydt.PrimaryKey = dc; if (!isSameRecord(mydt, new Object[] { dr[0].ToString(), dr[1].ToString() })) { tempDt.ImportRow(dr); } } else if (chkName.Checked) { //mydt mydt.PrimaryKey = new DataColumn[1] { mydt.Columns[0] }; if (!isSameRecord(mydt, new Object[] { dr[0].ToString() })) { tempDt.ImportRow(dr); } } else if (chkPrice.Checked) { mydt.PrimaryKey = new DataColumn[1] { mydt.Columns[7] }; if (!isSameRecord(mydt, new Object[] { dr[7].ToString() })) { tempDt.ImportRow(dr); } } } //MessageBox.Show(tempDt.Columns.Count+"=="+mydt.Rows[3].ItemArray.Length); if (saveCsvFile.ShowDialog() == DialogResult.OK) { //saveCsvFile.Filter = "csv file|*.csv"; string filename = saveCsvFile.FileName; // int[] quoteMark ={ 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1 }; CsvStreamWriter csw = new CsvStreamWriter(filename, System.Text.Encoding.Unicode); csw.QuoteMark(TBCSV.getQuoteMark()); csw.AddData(tempDt, 1); csw.FirstRowQuote = 2; csw.Save(); } else { MessageBox.Show("没保存"); } MessageBox.Show("共生成" + (tempDt.Rows.Count - 1) + "条数据"); }