Example #1
0
 /// <summary>
 /// 寻找含有关键字的列的表头
 /// </summary>
 /// <param name="root"></param>
 /// <param name="KeyWord"></param>
 public void PutTitleTrainingItem(HTMLEngine.MyRootHtmlNode root, string KeyWord)
 {
     foreach (var Table in root.TableList)
     {
         var t = new HTMLTable(Table.Value);
         for (int RowNo = 2; RowNo < t.RowCount; RowNo++)
         {
             //从第二行开始
             for (int ColNo = 1; ColNo < t.ColumnCount; ColNo++)
             {
                 var title = t.CellValue(1, ColNo).Replace(" ", "");
                 if (String.IsNullOrEmpty(title))
                 {
                     continue;
                 }
                 var value = t.CellValue(RowNo, ColNo);
                 if (Transform != null)
                 {
                     value = Transform(value, title);
                 }
                 if (value.NormalizeTextResult().Equals(KeyWord.NormalizeTextResult()))
                 {
                     if (!TrainingTitleResult.ContainsKey(title))
                     {
                         TrainingTitleResult.Add(title, 1);
                     }
                     else
                     {
                         TrainingTitleResult[title]++;
                     }
                 }
             }
         }
     }
 }
Example #2
0
 //寻找同时含有关键字的列的表头
 public static void PutTrainingItem(HTMLEngine.MyRootHtmlNode root, string KeyWord)
 {
     foreach (var Table in root.TableList)
     {
         var t = new HTMLTable(Table.Value);
         for (int RowNo = 2; RowNo < t.RowCount; RowNo++)
         {
             //从第二行开始
             for (int ColNo = 1; ColNo < t.ColumnCount; ColNo++)
             {
                 if (t.CellValue(RowNo, ColNo).NormalizeKey().Equals(KeyWord.NormalizeKey()))
                 {
                     var title = t.CellValue(1, ColNo);
                     if (!TrainingTitleResult.ContainsKey(title))
                     {
                         TrainingTitleResult.Add(title, 1);
                     }
                     else
                     {
                         TrainingTitleResult[title]++;
                     }
                 }
             }
         }
     }
 }
Example #3
0
    static List <struHoldAfter> GetHolderAfter(MyRootHtmlNode root)
    {
        var HoldList = new List <struHoldAfter>();

        foreach (var table in root.TableList)
        {
            var mt = new HTMLTable(table.Value);
            for (int RowIdx = 0; RowIdx < mt.RowCount; RowIdx++)
            {
                for (int ColIdx = 0; ColIdx < mt.ColumnCount; ColIdx++)
                {
                    if (mt.CellValue(RowIdx + 1, ColIdx + 1) == "合计持有股份")
                    {
                        var   HolderName = mt.CellValue(RowIdx + 1, 1);
                        Regex r          = new Regex(@"\d+\.?\d*");

                        var strHolderCnt = mt.CellValue(RowIdx + 1, 5);
                        strHolderCnt = Normalizer.NormalizeNumberResult(strHolderCnt);
                        var HolderCnt = "";
                        if (!String.IsNullOrEmpty(r.Match(strHolderCnt).Value))
                        {
                            if (mt.CellValue(2, 5).Contains("万"))
                            {
                                //是否要*10000
                                HolderCnt = (double.Parse(r.Match(strHolderCnt).Value) * 10_000).ToString();
                            }
                            else
                            {
                                HolderCnt = r.Match(strHolderCnt).Value;
                            }
                        }

                        var StrPercent    = mt.CellValue(RowIdx + 1, 6);
                        var HodlerPercent = "";
                        if (!String.IsNullOrEmpty(r.Match(StrPercent).Value))
                        {
                            HodlerPercent = (double.Parse(r.Match(StrPercent).Value) * 0.01).ToString();
                        }
                        HoldList.Add(new struHoldAfter()
                        {
                            Name = HolderName, Count = HolderCnt, Percent = HodlerPercent, Used = false
                        });
                    }
                }
            }
        }
        return(HoldList);
    }
Example #4
0
 /// <summary>
 /// 某类标题的值
 /// </summary>
 /// <param name="root"></param>
 /// <param name="KeyWord"></param>
 public void PutValueTrainingItem(HTMLEngine.MyRootHtmlNode root, List <string> TitleKeyWord)
 {
     foreach (var Table in root.TableList)
     {
         var t = new HTMLTable(Table.Value);
         for (int RowNo = 2; RowNo < t.RowCount; RowNo++)
         {
             //从第二行开始
             for (int ColNo = 1; ColNo < t.ColumnCount; ColNo++)
             {
                 var title = t.CellValue(1, ColNo).Replace(" ", "");
                 if (String.IsNullOrEmpty(title))
                 {
                     continue;
                 }
                 var value = t.CellValue(RowNo, ColNo).NormalizeTextResult();
                 if (string.IsNullOrEmpty(value))
                 {
                     continue;
                 }
                 foreach (var key in TitleKeyWord)
                 {
                     if (title.Equals(key))
                     {
                         if (!TrainingValueResult.ContainsKey(value))
                         {
                             TrainingValueResult.Add(value, 1);
                         }
                         else
                         {
                             TrainingValueResult[value]++;
                         }
                     }
                 }
             }
         }
     }
 }
Example #5
0
    List <struHoldAfter> GetHolderAfter()
    {
        var HoldList = new List <struHoldAfter>();

        foreach (var table in root.TableList)
        {
            var mt = new HTMLTable(table.Value);
            for (int RowIdx = 0; RowIdx < mt.RowCount; RowIdx++)
            {
                for (int ColIdx = 0; ColIdx < mt.ColumnCount; ColIdx++)
                {
                    if (mt.CellValue(RowIdx + 1, ColIdx + 1) == "合计持有股份" || mt.CellValue(RowIdx + 1, ColIdx + 1) == "合计持股")
                    {
                        var HolderName   = mt.CellValue(RowIdx + 1, 1);
                        var strHolderCnt = mt.CellValue(RowIdx + 1, mt.ColumnCount - 1);
                        strHolderCnt = Normalizer.NormalizeNumberResult(strHolderCnt);
                        var    title     = mt.CellValue(2, 5);
                        string HolderCnt = getAfterstock(title, strHolderCnt);

                        var StrPercent    = mt.CellValue(RowIdx + 1, mt.ColumnCount);
                        var HodlerPercent = getAfterpercent(StrPercent);
                        HoldList.Add(new struHoldAfter()
                        {
                            Name = HolderName, Count = HolderCnt, Percent = HodlerPercent, Used = false
                        });
                    }
                }
            }
        }
        if (HoldList.Count == 0)
        {
            HoldList = GetHolderAfter2ndStep();
        }
        if (HoldList.Count == 0)
        {
            HoldList = GetHolderAfter3rdStep();
        }
        return(HoldList);
    }
Example #6
0
    /// <summary>
    /// /// 分页表格的修复
    /// </summary>
    /// <param name="root"></param>
    public static void FixSpiltTable(MyRootHtmlNode root, AnnouceDocument doc)
    {
        for (int NextTableId = 2; NextTableId <= doc.root.TableList.Count; NextTableId++)
        {
            foreach (var item in doc.root.TableList[NextTableId])
            {
                var FirstTablePos  = -1;
                var SecondTablePos = -1;
                foreach (var p in root.Children)
                {
                    foreach (var s in p.Children)
                    {
                        if (s.TableId == NextTableId - 1)
                        {
                            FirstTablePos = s.PositionId;
                        }
                        if (s.TableId == NextTableId)
                        {
                            SecondTablePos = s.PositionId;
                        }
                    }
                }

                if (SecondTablePos - FirstTablePos > 200)
                {
                    continue;
                }

                var tablerec = item.Split("|");
                var pos      = tablerec[0].Split(",");
                var value    = tablerec[1];
                var row      = int.Parse(pos[1]);
                //第二张表,第一行存在NULL
                if (row == 1 && value == strNullValue)
                {
                    var table     = new HTMLTable(doc.root.TableList[NextTableId - 1]);
                    var nexttable = new HTMLTable(doc.root.TableList[NextTableId]);
                    if (table.ColumnCount != nexttable.ColumnCount)
                    {
                        continue;
                    }
                    //合并表
                    var offset = table.RowCount;
                    //修改第二张表格的数据
                    foreach (var Nextitem in root.TableList[NextTableId])
                    {
                        tablerec = Nextitem.Split("|");
                        pos      = tablerec[0].Split(",");
                        value    = tablerec[1];
                        var newtablerec = (NextTableId - 1) + "," + (offset + int.Parse(pos[1])) + "," + pos[2] + "|" + value;
                        root.TableList[NextTableId - 1].Add(newtablerec);
                    }
                    root.TableList[NextTableId].Clear();
                    for (int i = 0; i < root.Children.Count; i++)
                    {
                        for (int j = 0; j < root.Children[i].Children.Count; j++)
                        {
                            var node = root.Children[i].Children[j];
                            if (node.TableId == NextTableId)
                            {
                                node.TableId = -1;
                            }
                        }
                    }
                    break;
                }
            }
        }

        //1.是否存在连续表格 NextBrother
        for (int i = 0; i < root.Children.Count; i++)
        {
            for (int j = 0; j < root.Children[i].Children.Count; j++)
            {
                var node = root.Children[i].Children[j];
                if (node.TableId != -1)
                {
                    if (node.NextBrother != null)
                    {
                        if (node.NextBrother.TableId != -1)
                        {
                            var nextnode  = node.NextBrother;
                            var table     = new HTMLTable(root.TableList[node.TableId]);
                            var nexttable = new HTMLTable(root.TableList[nextnode.TableId]);
                            //Console.WriteLine("First  Table:" + table.RowCount + "X" + table.ColumnCount);
                            //Console.WriteLine("Second Table:" + nexttable.RowCount + "X" + nexttable.ColumnCount);
                            if (table.ColumnCount != nexttable.ColumnCount)
                            {
                                continue;
                            }
                            //Console.WriteLine("Two Tables Has Same Column Count!");
                            //2.连续表格的后一个,往往是有<NULL>的行
                            bool hasnull = false;
                            for (int nullcell = 1; nullcell <= table.ColumnCount; nullcell++)
                            {
                                if (nexttable.CellValue(1, nullcell) == HTMLTable.strNullValue)
                                {
                                    hasnull = true;
                                    break;
                                }
                            }

                            var ComboCompanyName         = "";
                            var ComboCompanyNameColumnNo = -1;
                            var CompanyFullNameList      = doc.companynamelist.Select((x) => { return(x.secFullName); }).Distinct().ToList();
                            //两表同列的元素,是否有能够合并成为公司名称的?注意,需要去除空格!!
                            int MaxColumn = table.ColumnCount;
                            for (int col = 1; col <= MaxColumn; col++)
                            {
                                int TableAMaxRow = table.RowCount;
                                int TableBMaxRow = nexttable.RowCount;
                                for (int RowCntA = 1; RowCntA < TableAMaxRow; RowCntA++)
                                {
                                    for (int RowCntB = 1; RowCntB < TableBMaxRow; RowCntB++)
                                    {
                                        var valueA = table.CellValue(RowCntA, col).Replace(" ", "");
                                        var valueB = nexttable.CellValue(RowCntB, col).Replace(" ", "");
                                        if (valueA != "" && valueB != "")
                                        {
                                            var value = valueA + valueB;
                                            if (CompanyFullNameList.Contains(value))
                                            {
                                                ComboCompanyName         = value;
                                                ComboCompanyNameColumnNo = col;
                                                //Console.WriteLine("Found FullName:" + value);
                                                break;
                                            }
                                        }
                                    }
                                    if (ComboCompanyNameColumnNo != -1)
                                    {
                                        break;
                                    }
                                }
                                if (ComboCompanyNameColumnNo != -1)
                                {
                                    break;
                                }
                            }
                            if (ComboCompanyNameColumnNo != -1)
                            {
                                //补完:注意,不能全部补!!A表以公司名开头,B表以公司名结尾
                                for (int k = 0; k < root.TableList[node.TableId].Count; k++)
                                {
                                    var tablerec = root.TableList[node.TableId][k].Split("|");
                                    var value    = tablerec[1].Replace(" ", "");
                                    //A表以公司名开头
                                    if (ComboCompanyName.StartsWith(value))
                                    {
                                        root.TableList[node.TableId][k] = tablerec[0] + "|" + ComboCompanyName;
                                    }
                                }
                                for (int k = 0; k < root.TableList[nextnode.TableId].Count; k++)
                                {
                                    var tablerec = root.TableList[nextnode.TableId][k].Split("|");
                                    var value    = tablerec[1].Replace(" ", "");
                                    //A表以公司名开头
                                    if (ComboCompanyName.EndsWith(value))
                                    {
                                        root.TableList[nextnode.TableId][k] = tablerec[0] + "|" + ComboCompanyName;
                                    }
                                }
                            }


                            //特殊业务处理:增减持
                            bool specaillogic = false;
                            var  BuyMethod = new string[] { "集中竞价交易", "竞价交易", "大宗交易", "约定式购回" }.ToList();
                            if (doc.GetType() == typeof(StockChange))
                            {
                                //增减持无表头的特殊处理
                                for (int spCell = 1; spCell <= table.ColumnCount; spCell++)
                                {
                                    if (BuyMethod.Contains(nexttable.CellValue(1, spCell)))
                                    {
                                        specaillogic = true;
                                        break;
                                    }
                                }
                            }

                            if (hasnull || ComboCompanyNameColumnNo != -1 || specaillogic)
                            {
                                var offset = table.RowCount;
                                //修改第二张表格的数据
                                foreach (var item in root.TableList[nextnode.TableId])
                                {
                                    var tablerec    = item.Split("|");
                                    var pos         = tablerec[0].Split(",");
                                    var value       = tablerec[1];
                                    var newtablerec = node.TableId + "," + (offset + int.Parse(pos[1])) + "," + pos[2] + "|" + value;
                                    root.TableList[node.TableId].Add(newtablerec);
                                }
                                root.TableList[nextnode.TableId].Clear();
                                nextnode.TableId = -1;
                                //Console.WriteLine("Found Split Tables!!");
                            }
                        }
                    }
                }
            }
        }
    }
Example #7
0
    List <struHoldAfter> GetHolderAfter2ndStep()
    {
        var HoldList = new List <struHoldAfter>();
        var keyword  = new string[] { "增持后持股", "减持后持股" };

        foreach (var table in root.TableList)
        {
            var HeaderRowNo = -1;
            var mt          = new HTMLTable(table.Value);
            for (int RowCount = 1; RowCount <= mt.RowCount; RowCount++)
            {
                for (int ColumnCount = 1; ColumnCount < mt.ColumnCount; ColumnCount++)
                {
                    var value = mt.CellValue(RowCount, ColumnCount);
                    foreach (var key in keyword)
                    {
                        if (value.Contains(key))
                        {
                            HeaderRowNo = RowCount;
                            break;
                        }
                    }
                    if (HeaderRowNo != -1)
                    {
                        break;
                    }
                }
                if (HeaderRowNo != -1)
                {
                    break;
                }
            }
            if (HeaderRowNo != -1)
            {
                //如果有5格
                if (mt.ColumnCount != 5)
                {
                    continue;
                }
                int PercentCol = -1;
                for (int rowno = HeaderRowNo + 1; rowno <= mt.RowCount; rowno++)
                {
                    var value1 = mt.CellValue(rowno, 1);

                    var Title4 = mt.CellValue(HeaderRowNo, 4);
                    var value4 = mt.CellValue(rowno, 4);
                    value4 = value4.Trim().Replace(",", String.Empty);
                    value4 = value4.Trim().Replace(",", String.Empty);

                    var Title5 = mt.CellValue(HeaderRowNo, 5).Replace(" ", "");
                    var value5 = mt.CellValue(rowno, 5);
                    value5 = value5.Trim().Replace(",", String.Empty);
                    value5 = value5.Trim().Replace(",", String.Empty);
                    if (Title5.Contains("增持后持股比例(%)") || Title5.Contains("减持后持股比例(%)"))
                    {
                        PercentCol = 5;
                        //Console.WriteLine(Title5);
                    }
                    if (PercentCol == 5 && !value5.Contains("%"))
                    {
                        value5 += "%";
                    }
                    if (RegularTool.IsNumeric(value4) && RegularTool.IsPercent(value5))
                    {
                        //Console.WriteLine("GetHolderAfter2ndStep:" + value1);
                        HoldList.Add(new struHoldAfter()
                        {
                            Name    = value1,
                            Count   = getAfterstock(Title4, value4),
                            Percent = getAfterpercent(value5),
                            Used    = false
                        });
                        continue;
                    }
                }
            }
        }
        return(HoldList);
    }
Example #8
0
    /// <summary>
    /// /// 分页表格的修复
    /// </summary>
    /// <param name="root"></param>
    public static void FixSpiltTable(AnnouceDocument doc)
    {
        //首行NULL的合并
        FirstRowNullFix(doc);

        OneRowFix(doc);

        for (int i = 0; i < doc.root.Children.Count; i++)
        {
            for (int j = 0; j < doc.root.Children[i].Children.Count; j++)
            {
                var node = doc.root.Children[i].Children[j];
                if (node.TableId != -1)
                {
                    if (node.NextBrother != null)
                    {
                        if (node.NextBrother.TableId != -1)
                        {
                            //1.是否存在连续表格 NextBrother
                            var nextnode  = node.NextBrother;
                            var table     = new HTMLTable(doc.root.TableList[node.TableId]);
                            var nexttable = new HTMLTable(doc.root.TableList[nextnode.TableId]);
                            //Console.WriteLine("First  Table:" + table.RowCount + "X" + table.ColumnCount);
                            //Console.WriteLine("Second Table:" + nexttable.RowCount + "X" + nexttable.ColumnCount);
                            if (table.ColumnCount != nexttable.ColumnCount)
                            {
                                continue;
                            }
                            //Console.WriteLine("Two Tables Has Same Column Count!");
                            //2.连续表格的后一个,往往是有<NULL>的行
                            bool hasnull = false;
                            for (int nullcell = 1; nullcell <= table.ColumnCount; nullcell++)
                            {
                                if (nexttable.CellValue(1, nullcell) == HTMLTable.strNullValue)
                                {
                                    hasnull = true;
                                    break;
                                }
                            }

                            var ComboCompanyName         = "";
                            var ComboCompanyNameColumnNo = -1;
                            var CompanyFullNameList      = doc.companynamelist.Select((x) => { return(x.secFullName); }).Distinct().ToList();
                            //两表同列的元素,是否有能够合并成为公司名称的?注意,需要去除空格!!
                            int MaxColumn = table.ColumnCount;
                            for (int col = 1; col <= MaxColumn; col++)
                            {
                                int TableAMaxRow = table.RowCount;
                                int TableBMaxRow = nexttable.RowCount;
                                for (int RowCntA = 1; RowCntA < TableAMaxRow; RowCntA++)
                                {
                                    for (int RowCntB = 1; RowCntB < TableBMaxRow; RowCntB++)
                                    {
                                        var valueA = table.CellValue(RowCntA, col).Replace(" ", "");
                                        var valueB = nexttable.CellValue(RowCntB, col).Replace(" ", "");
                                        if (valueA != "" && valueB != "")
                                        {
                                            var value = valueA + valueB;
                                            if (CompanyFullNameList.Contains(value))
                                            {
                                                ComboCompanyName         = value;
                                                ComboCompanyNameColumnNo = col;
                                                //Console.WriteLine("Found FullName:" + value);
                                                break;
                                            }
                                        }
                                    }
                                    if (ComboCompanyNameColumnNo != -1)
                                    {
                                        break;
                                    }
                                }
                                if (ComboCompanyNameColumnNo != -1)
                                {
                                    break;
                                }
                            }
                            if (ComboCompanyNameColumnNo != -1)
                            {
                                //补完:注意,不能全部补!!A表以公司名开头,B表以公司名结尾
                                for (int k = 0; k < doc.root.TableList[node.TableId].Count; k++)
                                {
                                    var tablerec = doc.root.TableList[node.TableId][k].Split("|");
                                    var value    = tablerec[1].Replace(" ", "");
                                    //A表以公司名开头
                                    if (ComboCompanyName.StartsWith(value))
                                    {
                                        doc.root.TableList[node.TableId][k] = tablerec[0] + "|" + ComboCompanyName;
                                    }
                                }
                                for (int k = 0; k < doc.root.TableList[nextnode.TableId].Count; k++)
                                {
                                    var tablerec = doc.root.TableList[nextnode.TableId][k].Split("|");
                                    var value    = tablerec[1].Replace(" ", "");
                                    //A表以公司名开头
                                    if (ComboCompanyName.EndsWith(value))
                                    {
                                        doc.root.TableList[nextnode.TableId][k] = tablerec[0] + "|" + ComboCompanyName;
                                    }
                                }
                            }
                            if (hasnull || ComboCompanyNameColumnNo != -1)
                            {
                                MergeTable(doc, nextnode.TableId);
                            }
                        }
                    }
                }
            }
        }
    }
Example #9
0
 /// <summary>
 /// 带条件的标题检索
 /// </summary>
 /// <param name="root"></param>
 /// <param name="KeyWord"></param>
 /// <param name="ConditionKey"></param>
 public void PutTitleTrainingItemWithCodition(HTMLEngine.MyRootHtmlNode root, string KeyWord, string ConditionKey)
 {
     if (root.TableList == null)
     {
         return;
     }
     foreach (var Table in root.TableList)
     {
         var t = new HTMLTable(Table.Value);
         for (int RowNo = 2; RowNo < t.RowCount; RowNo++)
         {
             var IsConditionOK  = false;
             var ConditionTitle = "";
             for (int ColNo = 1; ColNo < t.ColumnCount; ColNo++)
             {
                 var title = t.CellValue(1, ColNo).Replace(" ", "");
                 if (String.IsNullOrEmpty(title))
                 {
                     continue;
                 }
                 var value = t.CellValue(RowNo, ColNo);
                 if (value.NormalizeTextResult().Contains(ConditionKey.NormalizeTextResult()))
                 {
                     ConditionTitle = title;
                     IsConditionOK  = true;
                     break;
                 }
             }
             if (!IsConditionOK)
             {
                 continue;
             }
             //从第二行开始
             for (int ColNo = 1; ColNo < t.ColumnCount; ColNo++)
             {
                 var title = t.CellValue(1, ColNo).Replace(" ", "");
                 if (String.IsNullOrEmpty(title))
                 {
                     continue;
                 }
                 var value = t.CellValue(RowNo, ColNo);
                 if (Transform != null)
                 {
                     value = Transform(value, title);
                 }
                 if (value.NormalizeTextResult().Equals(KeyWord.NormalizeTextResult()))
                 {
                     if (!TrainingTitleResult.ContainsKey(title))
                     {
                         TrainingTitleResult.Add(title, 1);
                     }
                     else
                     {
                         TrainingTitleResult[title]++;
                     }
                     if (!TrainingTitleCondition.ContainsKey(ConditionTitle))
                     {
                         TrainingTitleCondition.Add(ConditionTitle, 1);
                     }
                     else
                     {
                         TrainingTitleCondition[ConditionTitle]++;
                     }
                 }
             }
         }
     }
 }
Example #10
0
    /// <summary>
    /// 标题优先度
    /// </summary>
    /// <param name="root"></param>
    /// <param name="Rules"></param>
    /// <param name="IsMeger"></param>
    /// <returns></returns>
    public static List <CellInfo[]> GetMultiInfoByTitleRules(HTMLEngine.MyRootHtmlNode root, List <TableSearchTitleRule> Rules, bool IsMeger)
    {
        var Container = new List <CellInfo[]>();

        for (int tableIndex = 0; tableIndex < root.TableList.Count; tableIndex++)
        {
            var      table = new HTMLTable(root.TableList[tableIndex + 1]);
            var      checkResultColumnNo = new int[Rules.Count];
            var      checkResultTitle    = new string[Rules.Count];
            var      HeaderRowNo         = -1;
            String[] HeaderRow           = null;
            var      IsFirstRowOneCell   = false; //第一行是否为整行合并
            for (int TestRowHeader = 1; TestRowHeader < table.RowCount; TestRowHeader++)
            {
                checkResultColumnNo = new int[Rules.Count];
                var IsOneColumnRow = true;  //是否整行合并
                for (int i = 2; i <= table.ColumnCount; i++)
                {
                    if (table.CellValue(TestRowHeader, i) != (table.CellValue(TestRowHeader, 1)))
                    {
                        IsOneColumnRow = false;
                        break;
                    }
                }
                if (IsOneColumnRow)
                {
                    if (TestRowHeader == 1)
                    {
                        IsFirstRowOneCell = true;
                    }
                    continue;
                }
                HeaderRow = table.GetRow(TestRowHeader);
                for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++)
                {
                    foreach (var EvaluateTitle in Rules[checkItemIdx].Title)
                    {
                        //根据标题优先度检索,对每个标题单独检索
                        for (int ColIndex = 0; ColIndex < HeaderRow.Length; ColIndex++)
                        {
                            //在每个行首单元格检索
                            //标题的处理
                            if (Rules[checkItemIdx].IsTitleEq)
                            {
                                //相等模式
                                if (!EvaluateTitle.Equals(HeaderRow[ColIndex].Replace(" ", "")))
                                {
                                    continue;
                                }
                                if (Rules[checkItemIdx].ExcludeTitle != null)
                                {
                                    var isOK = true;
                                    foreach (var word in Rules[checkItemIdx].ExcludeTitle)
                                    {
                                        if (HeaderRow[ColIndex].Contains(word))
                                        {
                                            isOK = false;
                                            break;
                                        }
                                    }
                                    if (!isOK)
                                    {
                                        continue;
                                    }
                                }
                            }
                            else
                            {
                                //包含模式
                                if (!HeaderRow[ColIndex].Replace(" ", "").Contains(EvaluateTitle))
                                {
                                    continue;
                                }
                                if (Rules[checkItemIdx].ExcludeTitle != null)
                                {
                                    var isOK = true;
                                    foreach (var word in Rules[checkItemIdx].ExcludeTitle)
                                    {
                                        if (HeaderRow[ColIndex].Contains(word))
                                        {
                                            isOK = false;
                                            break;
                                        }
                                    }
                                    if (!isOK)
                                    {
                                        continue;
                                    }
                                }
                            }

                            //父标题的处理
                            if (Rules[checkItemIdx].SuperTitle != null && Rules[checkItemIdx].SuperTitle.Count != 0)
                            {
                                //具有父标题的情况
                                var IsFoundSuperTitle = false;
                                for (int superRowNo = 1; superRowNo < TestRowHeader; superRowNo++)
                                {
                                    var value = table.CellValue(superRowNo, ColIndex + 1).Replace(" ", "");
                                    if (Rules[checkItemIdx].IsSuperTitleEq)
                                    {
                                        //等于
                                        if (Rules[checkItemIdx].SuperTitle.Contains(value))
                                        {
                                            IsFoundSuperTitle = true;
                                            break;
                                        }
                                    }
                                    else
                                    {
                                        //包含
                                        foreach (var supertitle in Rules[checkItemIdx].SuperTitle)
                                        {
                                            if (value.Contains(supertitle))
                                            {
                                                IsFoundSuperTitle = true;
                                                break;
                                            }
                                        }
                                    }
                                    if (IsFoundSuperTitle)
                                    {
                                        break;
                                    }
                                }
                                if (!IsFoundSuperTitle)
                                {
                                    continue;
                                }
                            }
                            checkResultTitle[checkItemIdx]    = HeaderRow[ColIndex];
                            checkResultColumnNo[checkItemIdx] = ColIndex + 1;
                            break;
                        }
                        if (!String.IsNullOrEmpty(checkResultTitle[checkItemIdx]))
                        {
                            break;
                        }
                    }
                    //主字段没有找到,其他不用找了
                    if (checkResultColumnNo[0] == 0)
                    {
                        break;
                    }
                }

                bool IsAllRequiredItemOK = true;
                for (int checkItemIdx = 0; checkItemIdx < checkResultColumnNo.Length; checkItemIdx++)
                {
                    if (checkResultColumnNo[checkItemIdx] == 0 && Rules[checkItemIdx].IsRequire)
                    {
                        IsAllRequiredItemOK = false;
                        break;
                    }
                }

                if (IsAllRequiredItemOK)
                {
                    if (TestRowHeader == 1 || IsFirstRowOneCell)
                    {
                        HeaderRowNo = TestRowHeader;
                        break;
                    }
                    else
                    {
                        //对于标题栏非首行的情况,如果不是首行是一个大的整行合并单元格,则做严格检查
                        //进行严格的检查,暂时要求全匹配
                        var IsOK = true;
                        for (int i = 0; i < Rules.Count; i++)
                        {
                            if (checkResultColumnNo[i] == 0)
                            {
                                IsOK = false;
                                break;
                            }
                        }
                        if (IsOK)
                        {
                            HeaderRowNo = TestRowHeader;
                            break;
                        }
                    }
                }
            }

            //主字段没有找到,下一张表
            if (HeaderRowNo == -1)
            {
                continue;
            }

            for (int RowNo = HeaderRowNo; RowNo <= table.RowCount; RowNo++)
            {
                if (RowNo == HeaderRowNo)
                {
                    continue;
                }
                if (table.IsTotalRow(RowNo))
                {
                    continue;                                                //非合计行
                }
                var target = table.CellValue(RowNo, checkResultColumnNo[0]); //主字段非空
                if (target == String.Empty || target == strRowSpanValue || target == strColSpanValue || target == strNullValue)
                {
                    continue;
                }
                if (Rules[0].Title.Contains(target))
                {
                    continue;
                }

                var RowData = new CellInfo[Rules.Count];
                for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++)
                {
                    if (checkResultColumnNo[checkItemIdx] == 0)
                    {
                        continue;
                    }
                    var ColNo = checkResultColumnNo[checkItemIdx];
                    RowData[checkItemIdx].TableId = tableIndex + 1;
                    RowData[checkItemIdx].Row     = RowNo;
                    RowData[checkItemIdx].Column  = ColNo;
                    RowData[checkItemIdx].Title   = checkResultTitle[checkItemIdx];
                    if (table.CellValue(RowNo, ColNo).Equals(strNullValue))
                    {
                        continue;
                    }
                    RowData[checkItemIdx].RawData = table.CellValue(RowNo, ColNo);
                    if (Rules[checkItemIdx].Normalize != null)
                    {
                        RowData[checkItemIdx].RawData = Rules[checkItemIdx].Normalize(RowData[checkItemIdx].RawData, HeaderRow[ColNo - 1]);
                    }
                }

                var HasSame = false;
                foreach (var existRow in Container)
                {
                    if (IsSameContent(existRow, RowData))
                    {
                        HasSame = true;
                        break;
                    }
                }
                if (!HasSame)
                {
                    Container.Add(RowData);
                }
            }
        }
        if (IsMeger)
        {
            Container = MergerMultiInfo(Container);
        }
        return(Container);
    }
Example #11
0
    public static List<CellInfo[]> GetMultiInfo(HTMLEngine.MyRootHtmlNode root, List<TableSearchRule> Rules, bool IsMeger)
    {
        var Container = new List<CellInfo[]>();
        for (int tableIndex = 0; tableIndex < root.TableList.Count; tableIndex++)
        {
            var table = new HTMLTable(root.TableList[tableIndex + 1]);
            var HeaderRow = table.GetHeaderRow();

            var checkResult = new int[Rules.Count];
            for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++)
            {
                //在每个行首单元格检索
                for (int ColIndex = 0; ColIndex < HeaderRow.Length; ColIndex++)
                {
                    if (Rules[checkItemIdx].IsEq)
                    {
                        //相等模式:规则里面没有该词语
                        if (!Rules[checkItemIdx].Rule.Contains(HeaderRow[ColIndex])) continue;
                    }
                    else
                    {
                        bool IsMatch = false;
                        //包含模式
                        foreach (var r in Rules[checkItemIdx].Rule)
                        {
                            if (HeaderRow[ColIndex].Contains(r))
                            {
                                IsMatch = true;
                                break;
                            }
                        }
                        if (!IsMatch) continue;
                    }
                    //找到列位置
                    checkResult[checkItemIdx] = ColIndex + 1;
                    break;
                }
                //主字段没有找到,其他不用找了
                if (checkResult[0] == 0) break;
            }

            //主字段没有找到,下一张表
            if (checkResult[0] == 0) continue;

            for (int RowNo = 2; RowNo <= table.RowCount; RowNo++)
            {
                if (table.IsTotalRow(RowNo)) continue;          //非合计行
                var target = table.CellValue(RowNo, checkResult[0]);    //主字段非空
                if (target == "" || target == "<rowspan>" || target == "<colspan>" || target == "<null>") continue;
                if (Rules[0].Rule.Contains(target)) continue;

                var RowData = new CellInfo[Rules.Count];
                for (int checkItemIdx = 0; checkItemIdx < Rules.Count; checkItemIdx++)
                {
                    if (checkResult[checkItemIdx] == 0) continue;
                    var ColNo = checkResult[checkItemIdx];
                    RowData[checkItemIdx].TableId = tableIndex + 1;
                    RowData[checkItemIdx].Row = RowNo;
                    RowData[checkItemIdx].Column = ColNo;

                    if (table.CellValue(RowNo, ColNo).Equals("<null>")) continue;
                    RowData[checkItemIdx].RawData = table.CellValue(RowNo, ColNo);
                    if (Rules[checkItemIdx].Normalize != null)
                    {
                        RowData[checkItemIdx].RawData = Rules[checkItemIdx].Normalize(RowData[checkItemIdx].RawData, HeaderRow[ColNo - 1]);
                    }

                }

                var HasSame = false;
                foreach (var existRow in Container)
                {
                    if (IsSameContent(existRow, RowData))
                    {
                        HasSame = true;
                        break;
                    }
                }
                if (!HasSame) Container.Add(RowData);
            }
        }
        if (IsMeger) Container = MergerMultiInfo(Container);
        return Container;
    }