Example #1
0
    /// <summary>
    /// 寻找字符的位置信息
    /// </summary>
    /// <param name="KeyWord"></param>
    /// <param name="root"></param>
    /// <returns></returns>
    public static List <LocAndValue <String> > FindWordLoc(string KeyWord, MyRootHtmlNode root)
    {
        var paragrahIdList = new List <LocAndValue <String> >();

        foreach (var paragrah in root.Children)
        {
            //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
            foreach (var contentNode in paragrah.Children)
            {
                if (contentNode.TableId == -1)
                {
                    var Idx = contentNode.Content.IndexOf(KeyWord);
                    if (Idx != -1)
                    {
                        var Loc = new LocAndValue <String>()
                        {
                            Value    = KeyWord,
                            Loc      = contentNode.PositionId,
                            StartIdx = Idx,
                        };
                        paragrahIdList.Add(Loc);
                    }
                }
            }
        }
        return(paragrahIdList);
    }
Example #2
0
 bool JiaFangValidator(LocAndValue <string> x)
 {
     if (x.Value.Contains("招标"))
     {
         return(false);
     }
     return(true);
 }
Example #3
0
 static bool IsMatch <T>(WordRule rule, LocAndValue <T> evaluate)
 {
     if (rule.Description != null && rule.Description.Count != 0)
     {
         if (!rule.Description.Contains(evaluate.Description))
         {
             return(false);
         }
     }
     if (rule.Word != null && rule.Word.Count != 0)
     {
         if (!rule.Word.Contains(evaluate.Value.ToString()))
         {
             return(false);
         }
     }
     return(true);
 }
Example #4
0
        /// <summary>
        /// 距离(别的词语在后面,则为正数)
        /// </summary>
        /// <param name="other"></param>
        /// <returns></returns>
        public int Distance(LocAndValue <T> other)
        {
            int mypos    = Loc * 1000 + StartIdx;
            int otherpos = other.Loc * 1000 + other.StartIdx;

            if (Value is string)
            {
                //别的词语在后面,则为正数
                if (other.StartIdx > this.StartIdx)
                {
                    //其他
                    return(otherpos - mypos - Value.ToString().Length);
                }
                else
                {
                    return(otherpos + other.Value.ToString().Length - mypos);
                }
            }
            else
            {
                //别的词语在后面,则为正数
                return(otherpos - mypos);
            }
        }
Example #5
0
    /// <summary>
    /// 正则表达式检索方法(前置,正则,后置)
    /// </summary>
    /// <param name="loc"></param>
    /// <param name="OrgString"></param>
    /// <param name="regularfeature"></param>
    /// <param name="SplitChar"></param>
    /// <returns></returns>
    public static List <LocAndValue <String> > RegularExFinder(int loc, string OrgString, struRegularExpressFeature regularfeature, string SplitChar = "")
    {
        var list    = new List <LocAndValue <String> >();
        var reglist = RegularTool.GetRegular(OrgString, regularfeature.RegularExpress);

        foreach (var reg in reglist)
        {
            //根据前后词语进行过滤
            bool   IsBeforeOK   = true;
            string BeforeString = "";
            if (regularfeature.LeadingWordList != null)
            {
                IsBeforeOK = false;
                //前置词语
                foreach (var leading in regularfeature.LeadingWordList)
                {
                    if (reg.Index - leading.Length >= 0)
                    {
                        var word = OrgString.Substring(reg.Index - leading.Length, leading.Length);
                        if (word.Equals(leading))
                        {
                            BeforeString = leading;
                            IsBeforeOK   = true;
                            break;
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
            }
            if (!IsBeforeOK)
            {
                continue;
            }

            bool   IsAfterOK   = true;
            string AfterString = "";
            if (regularfeature.TrailingWordList != null)
            {
                IsAfterOK = false;
                //后置词语
                foreach (var trailing in regularfeature.TrailingWordList)
                {
                    if (reg.Index + reg.Length + trailing.Length <= OrgString.Length)
                    {
                        var word = OrgString.Substring(reg.Index + reg.Length, trailing.Length);
                        if (word.Equals(trailing))
                        {
                            AfterString = trailing;
                            IsAfterOK   = true;
                            break;
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
            }

            if (IsBeforeOK && IsAfterOK)
            {
                var Loc = new LocAndValue <String>()
                {
                    Value    = BeforeString + SplitChar + reg.RawData + SplitChar + AfterString,
                    StartIdx = reg.Index - BeforeString.Length,
                    Loc      = loc
                };
                list.Add(Loc);
            }
        }
        return(list);
    }