/// <summary> /// 获取html中所有数据 /// </summary> /// <returns></returns> public List <CellSegment> GetAllData() { //先处理csoftph标签 还原标签内容 string xpathPH = "//csoftph"; HtmlNodeCollection phNL = hdoc.DocumentNode.SelectNodes(xpathPH); if (phNL != null) { if (phNL.Count > 0) { foreach (HtmlNode ph in phNL) { HtmlTextNode tn = hdoc.CreateTextNode(ph.Attributes["text"].Value); ph.ParentNode.ReplaceChild(tn, ph); } } } string xpath = @"//body/div"; HtmlNodeCollection nl = hdoc.DocumentNode.SelectNodes(xpath); List <CellSegment> csList = new List <CellSegment>(); //提取数据 foreach (HtmlNode node in nl) { CellSegment cs = new CellSegment(node.InnerHtml, node.GetAttributeValue("ShtName", ""), node.GetAttributeValue("CellPosition", "")); cs.EndWhiteSpaceSign = node.GetAttributeValue("EndWhiteSpaceSign", ""); cs.SegmentIndex = Convert.ToInt32(node.GetAttributeValue("SegmentIndex", "0")); csList.Add(cs); } return(csList); }
/// <summary> /// 手动分段 按钮 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btn_ManalSegment_Click(object sender, EventArgs e) { if (rtb_SelText.Lines.Count() <= 1) { return; } List <CellSegment> repList = new List <CellSegment>(); int idx = lv_data.SelectedIndices[0]; string shtName = lv_data.SelectedItems[0].SubItems[0].Text; string cellPosition = lv_data.SelectedItems[0].SubItems[1].Text; string endSign = lv_data.SelectedItems[0].SubItems[3].Text; foreach (string line in rtb_SelText.Lines) { CellSegment cs = new CellSegment(line, shtName, cellPosition); repList.Add(cs); } repList.Last().EndWhiteSpaceSign = endSign; //最后一个的结束符 this.DataList.RemoveAt(idx); this.DataList.InsertRange(idx, repList); LoadDataList(); lv_data.Items[idx + rtb_SelText.Lines.Count() - 1].Selected = true; lv_data.EnsureVisible(idx + rtb_SelText.Lines.Count() + 1); lv_data.Focus(); }
/// <summary> /// 使用正则表达式作为 分隔符 分割 /// </summary> /// <param name="cs"></param> /// <param name="ptn"></param> /// <returns></returns> public static List <CellSegment> SegmentSplitByRegExp(CellSegment mainSeg, string ptn) { List <CellSegment> csList = new List <CellSegment>(); string endSign = mainSeg.EndWhiteSpaceSign; Regex regx = new Regex(ptn); if (!regx.IsMatch(mainSeg.Text)) { csList.Add(mainSeg.Clone()); return(csList); } //string[] splitTexts = mainSeg.Text.Split(new string[] { sign }, StringSplitOptions.None); string[] splitTexts = regx.Split(mainSeg.Text); for (int i = 0; i < splitTexts.Length; i++) { CellSegment cs = new CellSegment(splitTexts[i], mainSeg.ShtName, mainSeg.CellPosition); //cs.EndWhiteSpaceSign = " "; csList.Add(cs); } csList.Last().EndWhiteSpaceSign = endSign; //还原最后一个元素的结束符 return(csList); }
/// <summary> /// 克隆本CellSegment /// </summary> /// <returns></returns> public CellSegment Clone() { CellSegment cs = new CellSegment(); cs.Text = this.Text; cs.ShtName = this.ShtName; cs.CellPosition = this.CellPosition; cs.EndWhiteSpaceSign = this.EndWhiteSpaceSign; cs.SegmentIndex = this.SegmentIndex; return(cs); }
/// <summary> /// 合并分段 按钮 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btn_MergeText_Click(object sender, EventArgs e) { if (lv_data.SelectedItems.Count <= 1) { return; } if (!InSameCell()) { MessageBox.Show("选择的项目不在同一个单元格内,无法合并。"); return; } if (!IsContinuosIndices()) { MessageBox.Show("不是连续索引,无法合并。"); return; } int idx = lv_data.SelectedIndices[0]; string shtName = lv_data.SelectedItems[0].SubItems[0].Text; string cellPosition = lv_data.SelectedItems[0].SubItems[1].Text; string newTxt = ""; foreach (ListViewItem lvi in lv_data.SelectedItems) { if (lvi.SubItems[3].Text == "\\n") { newTxt += lvi.SubItems[4].Text + "\n"; } else if (lvi.SubItems[3].Text == "\\t") { newTxt += lvi.SubItems[4].Text + "\t"; } else { newTxt += lvi.SubItems[4].Text + lvi.SubItems[3].Text; } } CellSegment cs = new CellSegment(newTxt, lv_data.SelectedItems[0].SubItems[0].Text, lv_data.SelectedItems[0].SubItems[1].Text); //删掉选中项目 this.DataList.RemoveRange(idx, lv_data.SelectedItems.Count); //添加 this.DataList.Insert(idx, cs); LoadDataList(); lv_data.Items[idx].Selected = true; lv_data.EnsureVisible(idx + 1); lv_data.Focus(); }
/// <summary> /// 数据合并 根据CellPosition 最终是一个单元格一条数据 /// </summary> /// <param name="dataList"></param> /// <param name="sentenceTail">在拼接成一个条数据时,句尾添加的字符串, 默认不添加,如果是英文可以添加空格</param> /// <returns></returns> public List <CellSegment> DataMergeByCellPosition(List <CellSegment> dataList, string addTail = "") { Dictionary <string, List <CellSegment> > csDic = new Dictionary <string, List <CellSegment> >(); foreach (CellSegment cs in dataList) { string keyText = cs.ShtName + "/" + cs.CellPosition; if (csDic.ContainsKey(keyText)) { csDic[keyText].Add(cs); } else { csDic.Add(keyText, new List <CellSegment>() { cs }); } } List <CellSegment> csList = new List <CellSegment>();//创建新的 list foreach (KeyValuePair <string, List <CellSegment> > kvp in csDic) { //处理每个字典 value 的 text 拼接 List <CellSegment> subList = kvp.Value; subList.Sort(new CellSegmentComparer(CellSegmentComapreBy.SegmentIndex)); //按照SegmentIndex 排序 string shtName = subList[0].ShtName; string cellPos = subList[0].CellPosition; string endSign = subList.Last().EndWhiteSpaceSign; string texts = ""; foreach (CellSegment cs in subList) { texts += cs.Text + StaticValues.WSText2Symbol(cs.EndWhiteSpaceSign); //拼接字符串 if (!string.IsNullOrEmpty(cs.Text.Trim())) { texts += addTail; } } CellSegment mergedCS = new CellSegment(texts, shtName, cellPos); mergedCS.EndWhiteSpaceSign = endSign; csList.Add(mergedCS); } return(csList); }
/// <summary> /// 自动分段 按钮 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btn_AutoSegment_Click(object sender, EventArgs e) { string detText = tb_Signs.Text; List <CellSegment> tmpList = new List <CellSegment>(); foreach (CellSegment cs in this.DataList) { tmpList.AddRange(CellSegment.SegmentSplitBySigns(cs, detText)); } this.DataList.Clear(); this.DataList = CellSegment.ListClone(tmpList); tmpList.Clear(); LoadDataList(); }
/// <summary> /// 自动分段 /// </summary> /// <param name="segText"></param> public static List <CellSegment> SegmentSplitBySigns(CellSegment seg, string signText) { List <CellSegment> csList = new List <CellSegment>(); csList.Add(seg.Clone()); List <CellSegment> tmpCsList = new List <CellSegment>(); string[] signs = signText.Split(','); foreach (string sign in signs) { for (int i = 0; i < csList.Count; i++) { List <CellSegment> subList = SegmentSplitBySingleSign(csList[i], sign); tmpCsList.AddRange(subList); } csList = ListClone(tmpCsList); tmpCsList.Clear(); } //排序号 return(csList); }
private void btn_RegxSegment_Click(object sender, EventArgs e) { string ptn = tb_RegExpText.Text; if (string.IsNullOrEmpty(ptn.Trim())) { return; } List <CellSegment> tmpList = new List <CellSegment>(); foreach (CellSegment cs in this.DataList) { List <CellSegment> splittedList = CellSegment.SegmentSplitByRegExp(cs, ptn); tmpList.AddRange(splittedList); } this.DataList.Clear(); this.DataList = CellSegment.ListClone(tmpList); tmpList.Clear(); LoadDataList(); }
/// <summary> /// 使用单独sign分割 /// </summary> /// <param name="segText"></param> /// <param name="orgSign"></param> /// <returns></returns> public static List <CellSegment> SegmentSplitBySingleSign(CellSegment mainSeg, string orgSign) { string lastSign = mainSeg.EndWhiteSpaceSign; List <CellSegment> csList = new List <CellSegment>(); string sign = orgSign; sign = StaticValues.WSText2Symbol(orgSign); if (!mainSeg.Text.Contains(sign)) { csList.Add(mainSeg.Clone()); return(csList); } string[] splitTexts = mainSeg.Text.Split(new string[] { sign }, StringSplitOptions.None); for (int i = 0; i < splitTexts.Length - 1; i++) { if (sign == "\n" || sign == "\t") { CellSegment cs = new CellSegment(splitTexts[i], mainSeg.ShtName, mainSeg.CellPosition); cs.EndWhiteSpaceSign = orgSign; csList.Add(cs); } else { CellSegment cs = new CellSegment(splitTexts[i] + orgSign, mainSeg.ShtName, mainSeg.CellPosition); //cs.EndWhiteSpaceSign = " "; csList.Add(cs); } } //添加最后那个 CellSegment lastCS = new CellSegment(splitTexts[splitTexts.Length - 1], mainSeg.ShtName, mainSeg.CellPosition); lastCS.EndWhiteSpaceSign = lastSign; csList.Add(lastCS); return(csList); }