Exemple #1
0
 public void Extract(MyRootHtmlNode root)
 {
     CandidateWord.Clear();
     //先导词列表
     if (LeadingColonKeyWordList.Length > 0)
     {
         ExtractByColonKeyWord(root);
     }
     //结尾词列表
     if (TrailingWordList.Length > 0)
     {
         ExtractByTrailingKeyWord(root);
     }
     //是否有符号包裹特征
     if (MarkFeature.Length > 0)
     {
         ExtractByMarkFeature(root);
     }
     //开始字符结束字符
     if (StartEndFeature.Length > 0)
     {
         ExtractByStartEndStringFeature(root);
     }
     //正则表达式检索
     if (RegularExpressFeature.Length > 0)
     {
         ExtractByRegularExpressFeature(root);
     }
 }
Exemple #2
0
 //符号包裹
 void ExtractByMarkFeature(MyRootHtmlNode root)
 {
     foreach (var word in MarkFeature)
     {
         Func <String, List <String> > ExtractMethod = (x) =>
         {
             var strlist = new List <String>();
             foreach (var strContent in RegularTool.GetMultiValueBetweenMark(x, word.MarkStartWith, word.MarkEndWith))
             {
                 if (word.InnerStartWith != null)
                 {
                     if (!strContent.StartsWith(word.InnerStartWith))
                     {
                         continue;
                     }
                 }
                 if (word.InnerEndWith != null)
                 {
                     if (!strContent.EndsWith(word.InnerEndWith))
                     {
                         continue;
                     }
                 }
                 strlist.Add(strContent);
             }
             return(strlist);
         };
         SearchNormalContent(root, ExtractMethod);
     }
 }
Exemple #3
0
    /// <summary>
    /// 寻找字符的位置信息
    /// </summary>
    /// <param name="KeyWord"></param>
    /// <param name="root"></param>
    /// <returns></returns>
    public static List <LocAndValue <String> > FindWordLoc(string KeyWord, MyRootHtmlNode root)
    {
        var paragrahIdList = new List <LocAndValue <String> >();

        foreach (var paragrah in root.Children)
        {
            //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
            foreach (var contentNode in paragrah.Children)
            {
                if (contentNode.TableId == -1)
                {
                    var Idx = contentNode.Content.IndexOf(KeyWord);
                    if (Idx != -1)
                    {
                        var Loc = new LocAndValue <String>()
                        {
                            Value    = KeyWord,
                            Loc      = contentNode.PositionId,
                            StartIdx = Idx,
                        };
                        paragrahIdList.Add(Loc);
                    }
                }
            }
        }
        return(paragrahIdList);
    }
Exemple #4
0
    public static void FixNullValue(MyRootHtmlNode root, AnnouceDocument doc)
    {
        var CompanyFullNameList  = doc.companynamelist.Select((x) => { return(x.secFullName); }).Distinct().ToList();
        var CompanyShortNameList = doc.companynamelist.Select((x) => { return(x.secShortName); }).Distinct().ToList();

        for (int tableId = 1; tableId <= root.TableList.Count; tableId++)
        {
            var table = root.TableList[tableId];
            for (int checkItemIdx = 0; checkItemIdx < table.Count; checkItemIdx++)
            {
                var tablerec = table[checkItemIdx].Split("|");
                var pos      = tablerec[0].Split(",");
                var value    = tablerec[1].Replace(" ", "");
                var col      = int.Parse(pos[2]);
                if (CompanyFullNameList.Contains(value) || CompanyShortNameList.Contains(value))
                {
                    for (int fixIdx = 0; fixIdx < table.Count; fixIdx++)
                    {
                        var nullvalue = table[fixIdx].Split("|")[1];
                        var nullcol   = int.Parse(table[fixIdx].Split("|")[0].Split(",")[2]);
                        if (nullvalue.Equals(strNullValue) && col == nullcol)
                        {
                            table[fixIdx] = table[fixIdx].Split("|")[0] + "|" + value;
                        }
                    }
                }
            }
        }

        for (int tableId = 1; tableId <= root.TableList.Count; tableId++)
        {
            var table = root.TableList[tableId];
            for (int checkItemIdx = 0; checkItemIdx < table.Count; checkItemIdx++)
            {
                var tablerec = table[checkItemIdx].Split("|");
                var pos      = tablerec[0].Split(",");
                var value    = tablerec[1].Replace(" ", "");
                var row      = int.Parse(pos[1]);
                var col      = int.Parse(pos[2]);
                if (value == strNullValue && row != 1)
                {
                    //上一行是RowSpan,或者下一行是RowSpan,则这行也是RowSpan
                    var pre = tableId.ToString() + "," + (row - 1).ToString() + "," + col.ToString() + "|" + strRowSpanValue;
                    if (table.Contains(pre))
                    {
                        table[checkItemIdx] = tablerec[0] + "|" + strRowSpanValue;
                    }
                    else
                    {
                        var next = tableId.ToString() + "," + (row + 1).ToString() + "," + col.ToString() + "|" + strRowSpanValue;
                        if (table.Contains(next))
                        {
                            table[checkItemIdx] = tablerec[0] + "|" + strRowSpanValue;
                        }
                    }
                }
            }
        }
    }
Exemple #5
0
 /// <summary>
 /// 正则表达式抽取
 /// </summary>
 /// <param name="root"></param>
 void ExtractByRegularExpressFeature(MyRootHtmlNode root)
 {
     foreach (var regularfeature in RegularExpressFeature)
     {
         //特定检索方法(HTML内容,候补词列表)
         Func <String, List <String> > ExtractMethod = (x) =>
         {
             return(RegularExFinder(0, x, regularfeature).Select(y => y.Value).ToList());
         };
         SearchNormalContent(root, ExtractMethod);
     }
 }
Exemple #6
0
    static string GetProjectName(MyRootHtmlNode root)
    {
        var Extractor = new EntityProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "项目名称:", "工程名称:", "中标项目:", "合同标的:", "工程内容:" };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ProjectName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ProjectName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("项目名称候补词(关键字):[" + item + "]");
            return(ProjectName);
        }

        var MarkFeature = new EntityProperty.struMarkFeature();

        MarkFeature.MarkStartWith = "“";
        MarkFeature.MarkEndWith   = "”";
        MarkFeature.InnerEndWith  = "标段";

        var MarkFeatureConfirm = new EntityProperty.struMarkFeature();

        MarkFeatureConfirm.MarkStartWith = "“";
        MarkFeatureConfirm.MarkEndWith   = "”";
        MarkFeatureConfirm.InnerEndWith  = "标";

        Extractor.MarkFeature = new EntityProperty.struMarkFeature[] { MarkFeature, MarkFeatureConfirm };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ProjectName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ProjectName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("工程名称候补词(《XXX》):[" + item + "]");
            return(ProjectName);
        }

        var list = BussinessLogic.GetProjectName(root);

        if (list.Count > 0)
        {
            return(list[0]);
        }
        return("");
    }
Exemple #7
0
    static struContract ExtractSingle(MyRootHtmlNode node, String Id)
    {
        var contract = new struContract();

        //公告ID
        contract.id = Id;
        //甲方
        contract.JiaFang = GetJiaFang(node);
        var trailingwords = new string[] { "(以下简称", "(下称", "(简称", "(以下简称", "(下称", "(简称" };

        //暂时不做括号的正规化
        foreach (var trailin in trailingwords)
        {
            if (contract.JiaFang.Contains(trailin))
            {
                contract.JiaFang = Utility.GetStringBefore(contract.JiaFang, trailin);
            }
        }
        contract.JiaFang = contract.JiaFang.Replace(" ", "");

        //乙方
        contract.YiFang = GetYiFang(node);
        //暂时不做括号的正规化
        foreach (var trailin in trailingwords)
        {
            if (contract.YiFang.Contains(trailin))
            {
                contract.YiFang = Utility.GetStringBefore(contract.YiFang, trailin);
            }
        }
        contract.YiFang = contract.YiFang.Replace(" ", "");

        //金额
        contract.ContractMoneyUpLimit = Normalizer.NormalizerMoney(GetMoney(node), "");

        contract.ContractMoneyDownLimit = contract.ContractMoneyUpLimit;
        //合同
        contract.ContractName = GetContractName(node);
        contract.ContractName = contract.ContractName.Replace(" ", "").ToLower();
        //项目
        contract.ProjectName = GetProjectName(node);
        if (contract.ProjectName == "" && contract.ContractName.EndsWith("项目合同"))
        {
            contract.ProjectName = contract.ContractName.Substring(0, contract.ContractName.Length - 2);
        }
        contract.ProjectName = contract.ProjectName.Replace(" ", "").ToLower();

        return(contract);
    }
Exemple #8
0
    static List <struHoldAfter> GetHolderAfter(MyRootHtmlNode root)
    {
        var HoldList = new List <struHoldAfter>();

        foreach (var table in root.TableList)
        {
            var mt = new HTMLTable(table.Value);
            for (int RowIdx = 0; RowIdx < mt.RowCount; RowIdx++)
            {
                for (int ColIdx = 0; ColIdx < mt.ColumnCount; ColIdx++)
                {
                    if (mt.CellValue(RowIdx + 1, ColIdx + 1) == "合计持有股份")
                    {
                        var   HolderName = mt.CellValue(RowIdx + 1, 1);
                        Regex r          = new Regex(@"\d+\.?\d*");

                        var strHolderCnt = mt.CellValue(RowIdx + 1, 5);
                        strHolderCnt = Normalizer.NormalizeNumberResult(strHolderCnt);
                        var HolderCnt = "";
                        if (!String.IsNullOrEmpty(r.Match(strHolderCnt).Value))
                        {
                            if (mt.CellValue(2, 5).Contains("万"))
                            {
                                //是否要*10000
                                HolderCnt = (double.Parse(r.Match(strHolderCnt).Value) * 10_000).ToString();
                            }
                            else
                            {
                                HolderCnt = r.Match(strHolderCnt).Value;
                            }
                        }

                        var StrPercent    = mt.CellValue(RowIdx + 1, 6);
                        var HodlerPercent = "";
                        if (!String.IsNullOrEmpty(r.Match(StrPercent).Value))
                        {
                            HodlerPercent = (double.Parse(r.Match(StrPercent).Value) * 0.01).ToString();
                        }
                        HoldList.Add(new struHoldAfter()
                        {
                            Name = HolderName, Count = HolderCnt, Percent = HodlerPercent, Used = false
                        });
                    }
                }
            }
        }
        return(HoldList);
    }
Exemple #9
0
    static string GetContractName(MyRootHtmlNode root)
    {
        var Extractor   = new ExtractProperty();
        var MarkFeature = new ExtractProperty.struMarkFeature();

        MarkFeature.MarkStartWith = "《";
        MarkFeature.MarkEndWith   = "》";
        MarkFeature.InnerEndWith  = "合同";

        var MarkFeatureConfirm = new ExtractProperty.struMarkFeature();

        MarkFeatureConfirm.MarkStartWith = "《";
        MarkFeatureConfirm.MarkEndWith   = "》";
        MarkFeatureConfirm.InnerEndWith  = "确认书";


        Extractor.MarkFeature = new ExtractProperty.struMarkFeature[] { MarkFeature, MarkFeatureConfirm };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("合同名称候补词(《XXX》):[" + item + "]");
            return(item);
        }

        Extractor = new ExtractProperty();
        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "合同名称:" };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("合同名称候补词(关键字):[" + item + "]");
            return(item);
        }

        //合同
        Extractor = new ExtractProperty();
        var StartArray = new string[] { "签署了" };
        var EndArray   = new string[] { "合同" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("合同候补词(合同):[" + item + "]");
            return(item.Replace(" ", ""));
        }
        return("");
    }
Exemple #10
0
 //符号包裹
 void ExtractByStartEndStringFeature(MyRootHtmlNode root)
 {
     StartEndResultList.Clear();
     foreach (var word in StartEndFeature)
     {
         Func <String, List <String> > ExtractMethod = (x) =>
         {
             var list   = RegularTool.GetMultiValueBetweenString(x, word.StartWith, word.EndWith);
             var detail = new struStartEndResultDetail();
             detail.Feature       = word;
             detail.CandidateWord = list;
             return(list);
         };
         SearchNormalContent(root, ExtractMethod);
     }
 }
Exemple #11
0
 /// <summary>
 /// 结尾词
 /// </summary>
 /// <param name="root"></param>
 void ExtractByTrailingKeyWord(MyRootHtmlNode root)
 {
     foreach (var word in TrailingWordList)
     {
         Func <String, List <String> > ExtractMethod = (x) =>
         {
             var strlist = new List <String>();
             if (Utility.GetStringBefore(x, word) != String.Empty)
             {
                 strlist.Add(Utility.GetStringBefore(x, word));
             }
             return(strlist);
         };
         SearchNormalContent(root, ExtractMethod);
     }
 }
Exemple #12
0
    static struContract ExtractSingle(MyRootHtmlNode root, String Id)
    {
        var contract = new struContract();

        //公告ID
        contract.id = Id;
        //甲方
        contract.JiaFang = GetJiaFang(root);
        contract.JiaFang = AfterProcessJiaFang(contract.JiaFang);
        contract.JiaFang = contract.JiaFang.NormalizeTextResult();

        //乙方
        contract.YiFang = GetYiFang(root);
        //暂时不做括号的正规化
        foreach (var trailin in StockChange.CompanyNameTrailingwords)
        {
            if (contract.YiFang.Contains(trailin))
            {
                contract.YiFang = Utility.GetStringBefore(contract.YiFang, trailin);
            }
        }
        contract.YiFang = contract.YiFang.NormalizeTextResult();

        //合同
        contract.ContractName = GetContractName(root);
        contract.ContractName = contract.ContractName.NormalizeTextResult();

        //项目
        contract.ProjectName = GetProjectName(root);
        if (contract.ProjectName == "" && contract.ContractName.EndsWith("项目合同"))
        {
            contract.ProjectName = contract.ContractName.Substring(0, contract.ContractName.Length - 2);
        }
        contract.ProjectName = contract.ProjectName.NormalizeTextResult();


        //金额
        contract.ContractMoneyUpLimit   = Normalizer.NormalizerMoney(GetMoney(root), "");
        contract.ContractMoneyDownLimit = contract.ContractMoneyUpLimit;

        //联合体
        contract.UnionMember = GetUnionMember(root, contract.YiFang);
        return(contract);
    }
Exemple #13
0
 //Search Normal Content
 void SearchNormalContent(MyRootHtmlNode root, Func <String, List <String> > ExtractMethod)
 {
     foreach (var paragrah in root.Children)
     {
         //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
         foreach (var contentNode in paragrah.Children)
         {
             if (contentNode.TableId == -1)
             {
                 //非表格
                 var candidate = ExtractMethod(contentNode.Content);
                 if (candidate.Count != 0)
                 {
                     CandidateWord.AddRange(candidate);
                 }
             }
         }
     }
 }
Exemple #14
0
    public static bool HasWord(string KeyWord, MyRootHtmlNode root)
    {
        var paragrahIdList = new List <int>();

        foreach (var paragrah in root.Children)
        {
            //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
            foreach (var contentNode in paragrah.Children)
            {
                if (contentNode.TableId == -1)
                {
                    if (contentNode.Content.IndexOf(KeyWord) != -1)
                    {
                        return(true);
                    }
                }
            }
        }
        return(false);
    }
Exemple #15
0
    /// <summary>
    /// 指定词语出现的次数
    /// /// </summary>
    /// <param name="KeyWord"></param>
    /// <param name="root"></param>
    /// <returns></returns>
    public static List <int> FindWordCnt(string KeyWord, MyRootHtmlNode root)
    {
        var paragrahIdList = new List <int>();

        foreach (var paragrah in root.Children)
        {
            //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
            foreach (var contentNode in paragrah.Children)
            {
                if (contentNode.TableId == -1)
                {
                    if (contentNode.Content.IndexOf(KeyWord) != -1)
                    {
                        paragrahIdList.Add(contentNode.PositionId);
                    }
                }
            }
        }
        return(paragrahIdList);
    }
Exemple #16
0
    public int FindWordCnt(string KeyWord, MyRootHtmlNode root)
    {
        int cnt = 0;

        foreach (var paragrah in root.Children)
        {
            //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
            foreach (var contentNode in paragrah.Children)
            {
                if (contentNode.TableId == -1)
                {
                    if (contentNode.Content.IndexOf(KeyWord) != -1)
                    {
                        cnt++;
                    }
                }
            }
        }
        return(cnt);
    }
Exemple #17
0
    static string GetJiaFang(MyRootHtmlNode root)
    {
        var Extractor = new ExtractProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "发包人:", "招标人:", "业主方:", "业主:", "甲方:", "采购人:", "采购人名称:" };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("甲方候补词(关键字):[" + item + "]");
            return(item);
        }

        //招标
        Extractor = new ExtractProperty();
        var StartArray = new string[] { "业主", "收到", "接到" };
        var EndArray   = new string[] { "发来", "发出", "的中标" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = item;
            JiaFang = JiaFang.Replace("业主", "");
            Program.Logger.WriteLine("甲方候补词(招标):[" + item + "]");
            return(item);
        }

        //合同
        Extractor  = new ExtractProperty();
        StartArray = new string[] { "与", "与业主" };
        EndArray   = new string[] { "签署", "签订" };
        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("甲方候补词(合同):[" + item + "]");
            return(item);
        }
        return("");
    }
Exemple #18
0
    static string GetProjectName(MyRootHtmlNode root)
    {
        var Extractor = new ExtractProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "项目名称:" };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("项目名称候补词(关键字):[" + item + "]");
            return(item.Replace(" ", ""));
        }

        var MarkFeature = new ExtractProperty.struMarkFeature();

        MarkFeature.MarkStartWith = "“";
        MarkFeature.MarkEndWith   = "”";
        MarkFeature.InnerEndWith  = "标段";

        var MarkFeatureConfirm = new ExtractProperty.struMarkFeature();

        MarkFeatureConfirm.MarkStartWith = "“";
        MarkFeatureConfirm.MarkEndWith   = "”";
        MarkFeatureConfirm.InnerEndWith  = "标";

        Extractor.MarkFeature = new ExtractProperty.struMarkFeature[] { MarkFeature, MarkFeatureConfirm };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            Program.Logger.WriteLine("工程名称候补词(《XXX》):[" + item + "]");
            return(item);
        }

        var list = BussinessLogic.GetProjectName(root);

        if (list.Count > 0)
        {
            return(list[0]);
        }
        return("");
    }
Exemple #19
0
 //在所有的表格中,寻找包含指定内容的单元格
 public void searchKeyWordAtTable(MyRootHtmlNode root, string keyword, string exclude = "")
 {
     foreach (var content in root.TableList)
     {
         var pos = "";
         var value = "";
         if (value.IndexOf(keyword) != -1)
         {
             if (exclude != "")
             {
                 if (value.IndexOf(exclude) != -1) continue;
             }
             var cellInfo = new CellInfo();
             cellInfo.RawData = value;
             cellInfo.Column = int.Parse(pos.Split(",")[0]);
             cellInfo.Row = int.Parse(pos.Split(",")[1]);
             cellInfo.Column = int.Parse(pos.Split(",")[2]);
             CandidateCell.Add(cellInfo);
         }
     }
 }
Exemple #20
0
    /// <summary>
    /// 行调整(HTML两行合并为一行)
    /// </summary>
    /// <param name="root"></param>
    /// <param name="txtfilename"></param>
    static void AdjustTwoLine(MyRootHtmlNode root, string txtfilename)
    {
        //Line Before:招标人:国家电网公司
        //Content: 招标人:国家电网公司注册资本:2000亿元
        //如果出现行1 + 行2 == Content,则Content则变为行1,增加Content之后的项目
        var SR      = new StreamReader(txtfilename);
        var TxtList = new List <String>();

        while (!SR.EndOfStream)
        {
            string TxtLine = Normalizer.NormalizeItemListNumber(SR.ReadLine().Trim());
            TxtLine = TxtLine.Replace(" ", String.Empty);    //HTML是去空格的,PDF有空格
            if (!String.IsNullOrEmpty(TxtLine))
            {
                TxtList.Add(TxtLine);
            }
        }
        for (int i = 1; i < TxtList.Count - 1; i++)
        {
            var CombineLine = TxtList[i] + TxtList[i + 1];
            foreach (var paragrah in root.Children)
            {
                //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
                for (int pid = 0; pid < paragrah.Children.Count; pid++)
                {
                    var contentNode = paragrah.Children[pid];
                    if (contentNode.Content.Equals(CombineLine) && TxtList[i].Contains(":") && TxtList[i + 1].Contains(":"))
                    {
                        contentNode.Content = TxtList[i];
                        paragrah.Children.Add(new MyHtmlNode()
                        {
                            Content = TxtList[i + 1]
                        });
                    }
                }
            }
        }
        SR.Close();
    }
Exemple #21
0
 /// <summary>
 /// 检索流程方法
 /// </summary>
 /// <param name="root">HTML根</param>
 /// <param name="ExtractMethod">特定检索方法(HTML内容,候补词列表)</param>
 void SearchNormalContent(MyRootHtmlNode root, Func <String, List <String> > ExtractMethod)
 {
     foreach (var paragrah in root.Children)
     {
         //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
         foreach (var contentNode in paragrah.Children)
         {
             if (contentNode.TableId == -1)
             {
                 //非表格
                 var candidate = ExtractMethod(contentNode.Content);
                 foreach (var item in candidate)
                 {
                     CandidateWord.Add(new LocAndValue <String>()
                     {
                         Loc   = contentNode.PositionId,
                         Value = item
                     });
                 }
             }
         }
     }
 }
Exemple #22
0
    /// <summary>
    /// 分析
    /// </summary>
    /// <param name="htmlfile"></param>
    /// <param name="TextFileName"></param>
    /// <returns></returns>
    public MyRootHtmlNode Anlayze(string htmlfile, string TextFileName)
    {
        TableId        = 0;
        DetailItemId   = 0;
        TableList      = new Dictionary <int, List <String> >();
        DetailItemList = new Dictionary <int, List <String> >();
        //一般来说第一个都是DIV, <div title="关于重大合同中标的公告" type="pdf">
        var doc = new HtmlDocument();

        doc.Load(htmlfile);
        var node = doc.DocumentNode.SelectNodes("//div[@type='pdf']");
        var root = new MyRootHtmlNode();

        if (node == null)
        {
            return(root);
        }
        root.Content = node[0].Attributes["title"].Value;
        //第二层是所有的一定是Paragraph
        foreach (var SecondLayerNode in node[0].ChildNodes)
        {
            //Console.WriteLine(SecondLayerNode.Name);
            //跳过#text的节
            if (SecondLayerNode.Name == "div")
            {
                var title = String.Empty;
                if (SecondLayerNode.Attributes.Contains("title"))
                {
                    title = SecondLayerNode.Attributes["title"].Value;
                }
                else
                {
                    title = SecondLayerNode.InnerText;
                }
                var secondNode = new MyHtmlNode();
                secondNode.Content = title;
                AnlayzeParagraph(SecondLayerNode, secondNode);
                FindContentWithList(secondNode.Children);
                for (int i = 0; i < secondNode.Children.Count - 1; i++)
                {
                    secondNode.Children[i].NextBrother = secondNode.Children[i + 1];
                }

                for (int i = 1; i < secondNode.Children.Count; i++)
                {
                    secondNode.Children[i].PreviewBrother = secondNode.Children[i - 1];
                }
                root.Children.Add(secondNode);
            }
        }

        //特殊字符的矫正
        foreach (var x1 in root.Children)
        {
            x1.Content = CorrectHTML(x1.Content);
            foreach (var x2 in x1.Children)
            {
                x2.Content = CorrectHTML(x2.Content);
            }
        }

        //最后一个段落的检索
        var LastParagrah = root.Children.Last();

        if (LastParagrah.Children.Count > 0)
        {
            //重大合同:1232951
            var LastSentence = LastParagrah.Children.Last().Content;
            var sentence     = DateUtility.ConvertUpperToLower(LastSentence);
            var dateList     = DateUtility.GetDate(sentence);
            if (dateList.Count > 0)
            {
                var strDate = dateList.Last();
                if (!String.IsNullOrEmpty(strDate))
                {
                    var strBefore = Utility.GetStringBefore(sentence, strDate);
                    if (!String.IsNullOrEmpty(strBefore))
                    {
                        //尾部除去
                        LastParagrah.Children.RemoveAt(LastParagrah.Children.Count - 1);
                        strBefore = LastSentence.Substring(0, LastSentence.LastIndexOf("年") - 4);
                        LastParagrah.Children.Add(new MyHtmlNode()
                        {
                            Content = strBefore
                        });
                        LastParagrah.Children.Add(new MyHtmlNode()
                        {
                            Content = strDate
                        });
                    }
                }
            }
        }

        //根据文本文件内容进行调整
        if (File.Exists(TextFileName))
        {
            //重大合同之外,其实都无需做
            AdjustItemList(root, TextFileName);
            AdjustTwoLine(root, TextFileName);
        }

        for (int i = 0; i < root.Children.Count - 1; i++)
        {
            root.Children[i].NextBrother = root.Children[i + 1];
        }
        for (int i = 1; i < root.Children.Count; i++)
        {
            root.Children[i].PreviewBrother = root.Children[i - 1];
        }
        for (int i = 0; i < root.Children.Count; i++)
        {
            root.Children[i].PositionId = i + 1;
            for (int j = 0; j < root.Children[i].Children.Count; j++)
            {
                root.Children[i].Children[j].PositionId = (i + 1) * 100 + j + 1;
            }
        }
        root.TableList      = TableList;
        root.DetailItemList = DetailItemList;
        return(root);
    }
Exemple #23
0
    static string GetJiaFang(MyRootHtmlNode root)
    {
        var Extractor = new EntityProperty();

        //这些关键字后面
        Extractor.LeadingWordList = new string[] {
            "甲方:",
            "发包人:", "发包单位:", "发包方:", "发包机构:", "发包人名称:",
            "招标人:", "招标单位:", "招标方:", "招标机构:", "招标人名称:",
            "业主:", "业主单位:", "业主方:", "业主机构:", "业主名称:",
            "采购单位:", "采购人:", "采购人名称:", "采购方:"
        };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = AfterProcessJiaFang(item.Trim());
            if (EntityWordAnlayzeTool.TrimEnglish(JiaFang).Length > ContractTraning.MaxJiaFangLength)
            {
                continue;
            }
            if (JiaFang.Length < 3)
            {
                continue;                         //使用实际长度排除全英文的情况
            }
            Program.Logger.WriteLine("甲方候补词(关键字):[" + JiaFang + "]");
            return(JiaFang);
        }

        //招标
        Extractor = new EntityProperty();
        var StartArray = new string[] { "招标单位", "业主", "收到", "接到" };
        var EndArray   = new string[] { "发来", "发出", "的中标" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = AfterProcessJiaFang(item.Trim());
            JiaFang = JiaFang.Replace("业主", "").Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(JiaFang).Length > ContractTraning.MaxJiaFangLength)
            {
                continue;
            }
            if (JiaFang.Length < 3)
            {
                continue;                         //使用实际长度排除全英文的情况
            }
            Program.Logger.WriteLine("甲方候补词(招标):[" + JiaFang + "]");
            return(JiaFang);
        }

        //合同
        Extractor  = new EntityProperty();
        StartArray = new string[] { "与", "与业主" };
        EndArray   = new string[] { "签署", "签订" };
        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var JiaFang = AfterProcessJiaFang(item.Trim());
            JiaFang = JiaFang.Replace("业主", "").Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(JiaFang).Length > ContractTraning.MaxJiaFangLength)
            {
                continue;
            }
            if (JiaFang.Length < 3)
            {
                continue;                         //使用实际长度排除全英文的情况
            }
            Program.Logger.WriteLine("甲方候补词(合同):[" + JiaFang + "]");
            return(JiaFang);
        }
        return("");
    }
Exemple #24
0
    static string GetContractName(MyRootHtmlNode root)
    {
        var Extractor   = new EntityProperty();
        var MarkFeature = new EntityProperty.struMarkFeature();

        MarkFeature.MarkStartWith = "《";
        MarkFeature.MarkEndWith   = "》";
        MarkFeature.InnerEndWith  = "合同";

        var MarkFeatureConfirm = new EntityProperty.struMarkFeature();

        MarkFeatureConfirm.MarkStartWith = "《";
        MarkFeatureConfirm.MarkEndWith   = "》";
        MarkFeatureConfirm.InnerEndWith  = "确认书";


        Extractor.MarkFeature = new EntityProperty.struMarkFeature[] { MarkFeature, MarkFeatureConfirm };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ContractName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ContractName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("合同名称候补词(《XXX》):[" + item + "]");
            return(ContractName);
        }

        Extractor = new EntityProperty();
        //这些关键字后面
        Extractor.LeadingWordList = new string[] { "合同名称:" };
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ContractName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ContractName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("合同名称候补词(关键字):[" + item + "]");
            return(ContractName);
        }

        //合同
        Extractor = new EntityProperty();
        var StartArray = new string[] { "签署了" };
        var EndArray   = new string[] { "合同" };

        Extractor.StartEndFeature = Utility.GetStartEndStringArray(StartArray, EndArray);
        Extractor.Extract(root);
        foreach (var item in Extractor.CandidateWord)
        {
            var ContractName = item.Trim();
            if (EntityWordAnlayzeTool.TrimEnglish(ContractName).Length > ContractTraning.MaxContractNameLength)
            {
                continue;
            }
            Program.Logger.WriteLine("合同候补词(合同):[" + item + "]");
            return(ContractName);
        }
        return("");
    }
Exemple #25
0
    public static MyRootHtmlNode Anlayze(string htmlfile)
    {
        TableId        = 0;
        DetailItemId   = 0;
        TableList      = new Dictionary <int, List <String> >();
        DetailItemList = new Dictionary <int, List <String> >();
        //一般来说第一个都是DIV, <div title="关于重大合同中标的公告" type="pdf">
        var doc = new HtmlDocument();

        doc.Load(htmlfile);
        var node = doc.DocumentNode.SelectNodes("//div[@type='pdf']");
        var root = new MyRootHtmlNode();

        root.Content = node[0].Attributes["title"].Value;
        //第二层是所有的一定是Paragraph
        foreach (var SecondLayerNode in node[0].ChildNodes)
        {
            //Console.WriteLine(SecondLayerNode.Name);
            //跳过#text的节
            if (SecondLayerNode.Name == "div")
            {
                var title = "";
                if (SecondLayerNode.Attributes.Contains("title"))
                {
                    title = SecondLayerNode.Attributes["title"].Value;
                }
                else
                {
                    title = SecondLayerNode.InnerText;
                }
                var secondNode = new MyHtmlNode();
                secondNode.Content = title;
                AnlayzeParagraph(SecondLayerNode, secondNode);
                FindContentWithList(secondNode.Children);
                for (int i = 0; i < secondNode.Children.Count - 1; i++)
                {
                    secondNode.Children[i].NextBrother = secondNode.Children[i + 1];
                }

                for (int i = 1; i < secondNode.Children.Count; i++)
                {
                    secondNode.Children[i].PreviewBrother = secondNode.Children[i - 1];
                }
                root.Children.Add(secondNode);
            }
        }

        //最后一个段落的检索
        var LastParagrah = root.Children.Last();

        if (LastParagrah.Children.Count > 0)
        {
            //重大合同:1232951
            var LastSentence = LastParagrah.Children.Last().Content;
            var sentence     = Utility.ConvertUpperDateToLittle(LastSentence);
            var strDate      = RegularTool.GetDate(sentence);
            if (!String.IsNullOrEmpty(strDate))
            {
                var strBefore = Utility.GetStringBefore(sentence, strDate);
                if (!String.IsNullOrEmpty(strBefore))
                {
                    //尾部除去
                    LastParagrah.Children.RemoveAt(LastParagrah.Children.Count - 1);
                    strBefore = LastSentence.Substring(0, LastSentence.LastIndexOf("年") - 4);
                    LastParagrah.Children.Add(new MyHtmlNode()
                    {
                        Content = strBefore
                    });
                    LastParagrah.Children.Add(new MyHtmlNode()
                    {
                        Content = strDate
                    });
                }
            }
        }
        for (int i = 0; i < root.Children.Count - 1; i++)
        {
            root.Children[i].NextBrother = root.Children[i + 1];
        }

        for (int i = 1; i < root.Children.Count; i++)
        {
            root.Children[i].PreviewBrother = root.Children[i - 1];
        }
        root.TableList      = TableList;
        root.DetailItemList = DetailItemList;

        var txtfilename = htmlfile.Replace("html", "txt");

        if (File.Exists(txtfilename))
        {
            Adjust(root, txtfilename);
        }
        return(root);
    }
Exemple #26
0
    public static List <LocAndValue <String> > FindRegularExpressLoc(struRegularExpressFeature KeyWord, MyRootHtmlNode root)
    {
        var list = new List <LocAndValue <String> >();

        foreach (var paragrah in root.Children)
        {
            //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
            foreach (var contentNode in paragrah.Children)
            {
                list.AddRange(RegularExFinder(contentNode.PositionId, contentNode.Content, KeyWord));
            }
        }
        return(list);
    }
Exemple #27
0
    /// <summary>
    /// 调整条目项内容
    /// </summary>
    /// <param name="root"></param>
    /// <param name="txtfilename"></param>
    static void AdjustItemList(MyRootHtmlNode root, string txtfilename)
    {
        var SR = new StreamReader(txtfilename);

        while (!SR.EndOfStream)
        {
            string TxtLine = Normalizer.NormalizeItemListNumber(SR.ReadLine().Trim());
            TxtLine = TxtLine.Replace(" ", String.Empty);    //HTML是去空格的,PDF有空格
            //通过TXT补偿列表分裂的情况
            if (TxtLine.StartsWith("<"))
            {
                foreach (var paragrah in root.Children)
                {
                    //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
                    foreach (var contentNode in paragrah.Children)
                    {
                        if (contentNode.TableId == -1)
                        {
                            //非表格
                            if (TxtLine.StartsWith(contentNode.Content))
                            {
                                //重大合同:401597
                                if (!contentNode.Content.Equals(TxtLine))
                                {
                                    //Line:<1>合同名称:天津市公安局南开分局南开区 2016 年视频监控网建设运维服
                                    //Content:<1>合同名称:
                                    //Next Content Line:天津市公安局南开分局南开区2016年视频监控网建设运维服务项目建设运维服务项目合同

                                    //Line Before:<1>甲方:山东省临朐县人民政府
                                    //Content:<1>甲方:
                                    //Next Content Line:山东省临朐县人民政府地址:临朐县民主路102号

                                    //Console.WriteLine("Line Before:" + TxtLine);
                                    //Console.WriteLine("Content:" + contentNode.Content);
                                    if (contentNode.NextBrother != null &&
                                        !contentNode.NextBrother.Content.StartsWith("<"))
                                    {
                                        string NextContent = contentNode.NextBrother.Content;
                                        //Console.WriteLine("Next Content Line:" + NextContent);
                                        var CombineLine = contentNode.Content + NextContent;
                                        if ((CombineLine).StartsWith(TxtLine))
                                        {
                                            if (!NextContent.Contains(":"))
                                            {
                                                //如果上一行和下一行的拼接体不包含:号
                                                //则用拼接体,然后的话,用文本文件的结果
                                                TxtLine = CombineLine;
                                                contentNode.NextBrother.Content = String.Empty;
                                            }
                                        }
                                    }
                                    contentNode.Content = TxtLine;
                                    //Console.WriteLine("Line After:" + TxtLine);
                                }
                            }
                        }
                    }
                }
            }
        }
        SR.Close();
    }
Exemple #28
0
    struContract ExtractSingle(MyRootHtmlNode root, String Id)
    {
        contractType = String.Empty;
        foreach (var paragrah in root.Children)
        {
            foreach (var item in paragrah.Children)
            {
                if (item.Content.Contains("中标"))
                {
                    contractType = "中标";
                    break;
                }
                if (item.Content.Contains("合同"))
                {
                    contractType = "合同";
                    break;
                }
            }
            if (contractType != String.Empty)
            {
                break;
            }
        }

        if (contractType == String.Empty)
        {
            Console.WriteLine("contractType Null:" + Id);
        }

        var contract = new struContract();

        //公告ID
        contract.id = Id;
        //甲方
        contract.JiaFang = GetJiaFang();
        contract.JiaFang = CompanyNameLogic.AfterProcessFullName(contract.JiaFang).secFullName;
        contract.JiaFang = contract.JiaFang.NormalizeTextResult();
        if (!Nerlist.Contains(contract.JiaFang))
        {
            //作为特殊单位,国家电网公司一般都是甲方
            if (Nerlist.Contains("国家电网公司"))
            {
                contract.JiaFang = "国家电网公司";
            }
        }

        //乙方
        contract.YiFang = GetYiFang();
        contract.YiFang = CompanyNameLogic.AfterProcessFullName(contract.YiFang).secFullName;
        contract.YiFang = contract.YiFang.NormalizeTextResult();
        //按照规定除去括号
        contract.YiFang = RegularTool.TrimBrackets(contract.YiFang);


        //项目
        contract.ProjectName = GetProjectName();
        if (contract.ProjectName.StartsWith("“") && contract.ProjectName.EndsWith("”"))
        {
            contract.ProjectName = contract.ProjectName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray());
        }
        if (contract.ProjectName.EndsWith(",签约双方"))
        {
            contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, ",签约双方");
        }
        if (contract.ProjectName.Contains("(以下简称"))
        {
            contract.ProjectName = Utility.GetStringAfter(contract.ProjectName, "(以下简称");
        }
        contract.ProjectName = contract.ProjectName.NormalizeTextResult();

        //合同
        if (contractType == "中标")
        {
            //按照数据分析来看,应该工程名 在中标的时候填写,合同名在合同的时候填写
            contract.ContractName = String.Empty;
        }
        else
        {
            contract.ContractName = GetContractName();
            if (contract.ContractName.StartsWith("“") && contract.ContractName.EndsWith("”"))
            {
                contract.ContractName = contract.ContractName.TrimStart("“".ToCharArray()).TrimEnd("”".ToCharArray());
            }
            //去掉书名号
            contract.ContractName = contract.ContractName.Replace("《", String.Empty).Replace("》", String.Empty);
            if (contract.ContractName.Contains("(以下简称"))
            {
                contract.ContractName = Utility.GetStringAfter(contract.ContractName, "(以下简称");
            }
            contract.ContractName = contract.ContractName.NormalizeTextResult();
        }


        //金额
        var money = GetMoney();

        contract.ContractMoneyUpLimit   = MoneyUtility.Format(money.MoneyAmount, String.Empty);
        contract.ContractMoneyDownLimit = contract.ContractMoneyUpLimit;

        //联合体
        contract.UnionMember = GetUnionMember(contract.JiaFang, contract.YiFang);
        contract.UnionMember = contract.UnionMember.NormalizeTextResult();
        //按照规定除去括号
        contract.UnionMember = RegularTool.TrimBrackets(contract.UnionMember);
        return(contract);
    }
Exemple #29
0
    /// <summary>
    /// /// 分页表格的修复
    /// </summary>
    /// <param name="root"></param>
    public static void FixSpiltTable(MyRootHtmlNode root, AnnouceDocument doc)
    {
        for (int NextTableId = 2; NextTableId <= doc.root.TableList.Count; NextTableId++)
        {
            foreach (var item in doc.root.TableList[NextTableId])
            {
                var FirstTablePos  = -1;
                var SecondTablePos = -1;
                foreach (var p in root.Children)
                {
                    foreach (var s in p.Children)
                    {
                        if (s.TableId == NextTableId - 1)
                        {
                            FirstTablePos = s.PositionId;
                        }
                        if (s.TableId == NextTableId)
                        {
                            SecondTablePos = s.PositionId;
                        }
                    }
                }

                if (SecondTablePos - FirstTablePos > 200)
                {
                    continue;
                }

                var tablerec = item.Split("|");
                var pos      = tablerec[0].Split(",");
                var value    = tablerec[1];
                var row      = int.Parse(pos[1]);
                //第二张表,第一行存在NULL
                if (row == 1 && value == strNullValue)
                {
                    var table     = new HTMLTable(doc.root.TableList[NextTableId - 1]);
                    var nexttable = new HTMLTable(doc.root.TableList[NextTableId]);
                    if (table.ColumnCount != nexttable.ColumnCount)
                    {
                        continue;
                    }
                    //合并表
                    var offset = table.RowCount;
                    //修改第二张表格的数据
                    foreach (var Nextitem in root.TableList[NextTableId])
                    {
                        tablerec = Nextitem.Split("|");
                        pos      = tablerec[0].Split(",");
                        value    = tablerec[1];
                        var newtablerec = (NextTableId - 1) + "," + (offset + int.Parse(pos[1])) + "," + pos[2] + "|" + value;
                        root.TableList[NextTableId - 1].Add(newtablerec);
                    }
                    root.TableList[NextTableId].Clear();
                    for (int i = 0; i < root.Children.Count; i++)
                    {
                        for (int j = 0; j < root.Children[i].Children.Count; j++)
                        {
                            var node = root.Children[i].Children[j];
                            if (node.TableId == NextTableId)
                            {
                                node.TableId = -1;
                            }
                        }
                    }
                    break;
                }
            }
        }

        //1.是否存在连续表格 NextBrother
        for (int i = 0; i < root.Children.Count; i++)
        {
            for (int j = 0; j < root.Children[i].Children.Count; j++)
            {
                var node = root.Children[i].Children[j];
                if (node.TableId != -1)
                {
                    if (node.NextBrother != null)
                    {
                        if (node.NextBrother.TableId != -1)
                        {
                            var nextnode  = node.NextBrother;
                            var table     = new HTMLTable(root.TableList[node.TableId]);
                            var nexttable = new HTMLTable(root.TableList[nextnode.TableId]);
                            //Console.WriteLine("First  Table:" + table.RowCount + "X" + table.ColumnCount);
                            //Console.WriteLine("Second Table:" + nexttable.RowCount + "X" + nexttable.ColumnCount);
                            if (table.ColumnCount != nexttable.ColumnCount)
                            {
                                continue;
                            }
                            //Console.WriteLine("Two Tables Has Same Column Count!");
                            //2.连续表格的后一个,往往是有<NULL>的行
                            bool hasnull = false;
                            for (int nullcell = 1; nullcell <= table.ColumnCount; nullcell++)
                            {
                                if (nexttable.CellValue(1, nullcell) == HTMLTable.strNullValue)
                                {
                                    hasnull = true;
                                    break;
                                }
                            }

                            var ComboCompanyName         = "";
                            var ComboCompanyNameColumnNo = -1;
                            var CompanyFullNameList      = doc.companynamelist.Select((x) => { return(x.secFullName); }).Distinct().ToList();
                            //两表同列的元素,是否有能够合并成为公司名称的?注意,需要去除空格!!
                            int MaxColumn = table.ColumnCount;
                            for (int col = 1; col <= MaxColumn; col++)
                            {
                                int TableAMaxRow = table.RowCount;
                                int TableBMaxRow = nexttable.RowCount;
                                for (int RowCntA = 1; RowCntA < TableAMaxRow; RowCntA++)
                                {
                                    for (int RowCntB = 1; RowCntB < TableBMaxRow; RowCntB++)
                                    {
                                        var valueA = table.CellValue(RowCntA, col).Replace(" ", "");
                                        var valueB = nexttable.CellValue(RowCntB, col).Replace(" ", "");
                                        if (valueA != "" && valueB != "")
                                        {
                                            var value = valueA + valueB;
                                            if (CompanyFullNameList.Contains(value))
                                            {
                                                ComboCompanyName         = value;
                                                ComboCompanyNameColumnNo = col;
                                                //Console.WriteLine("Found FullName:" + value);
                                                break;
                                            }
                                        }
                                    }
                                    if (ComboCompanyNameColumnNo != -1)
                                    {
                                        break;
                                    }
                                }
                                if (ComboCompanyNameColumnNo != -1)
                                {
                                    break;
                                }
                            }
                            if (ComboCompanyNameColumnNo != -1)
                            {
                                //补完:注意,不能全部补!!A表以公司名开头,B表以公司名结尾
                                for (int k = 0; k < root.TableList[node.TableId].Count; k++)
                                {
                                    var tablerec = root.TableList[node.TableId][k].Split("|");
                                    var value    = tablerec[1].Replace(" ", "");
                                    //A表以公司名开头
                                    if (ComboCompanyName.StartsWith(value))
                                    {
                                        root.TableList[node.TableId][k] = tablerec[0] + "|" + ComboCompanyName;
                                    }
                                }
                                for (int k = 0; k < root.TableList[nextnode.TableId].Count; k++)
                                {
                                    var tablerec = root.TableList[nextnode.TableId][k].Split("|");
                                    var value    = tablerec[1].Replace(" ", "");
                                    //A表以公司名开头
                                    if (ComboCompanyName.EndsWith(value))
                                    {
                                        root.TableList[nextnode.TableId][k] = tablerec[0] + "|" + ComboCompanyName;
                                    }
                                }
                            }


                            //特殊业务处理:增减持
                            bool specaillogic = false;
                            var  BuyMethod = new string[] { "集中竞价交易", "竞价交易", "大宗交易", "约定式购回" }.ToList();
                            if (doc.GetType() == typeof(StockChange))
                            {
                                //增减持无表头的特殊处理
                                for (int spCell = 1; spCell <= table.ColumnCount; spCell++)
                                {
                                    if (BuyMethod.Contains(nexttable.CellValue(1, spCell)))
                                    {
                                        specaillogic = true;
                                        break;
                                    }
                                }
                            }

                            if (hasnull || ComboCompanyNameColumnNo != -1 || specaillogic)
                            {
                                var offset = table.RowCount;
                                //修改第二张表格的数据
                                foreach (var item in root.TableList[nextnode.TableId])
                                {
                                    var tablerec    = item.Split("|");
                                    var pos         = tablerec[0].Split(",");
                                    var value       = tablerec[1];
                                    var newtablerec = node.TableId + "," + (offset + int.Parse(pos[1])) + "," + pos[2] + "|" + value;
                                    root.TableList[node.TableId].Add(newtablerec);
                                }
                                root.TableList[nextnode.TableId].Clear();
                                nextnode.TableId = -1;
                                //Console.WriteLine("Found Split Tables!!");
                            }
                        }
                    }
                }
            }
        }
    }