예제 #1
0
    public static List <LocAndValue <String> > FindRegularExpressLoc(struRegularExpressFeature KeyWord, MyRootHtmlNode root)
    {
        var list = new List <LocAndValue <String> >();

        foreach (var paragrah in root.Children)
        {
            //从各个段落的内容中取得:内容包含了内置列表,所以,这里不再重复
            foreach (var contentNode in paragrah.Children)
            {
                list.AddRange(RegularExFinder(contentNode.PositionId, contentNode.Content, KeyWord));
            }
        }
        return(list);
    }
예제 #2
0
    /// <summary>
    /// 正则表达式检索方法(前置,正则,后置)
    /// </summary>
    /// <param name="loc"></param>
    /// <param name="OrgString"></param>
    /// <param name="regularfeature"></param>
    /// <param name="SplitChar"></param>
    /// <returns></returns>
    public static List <LocAndValue <String> > RegularExFinder(int loc, string OrgString, struRegularExpressFeature regularfeature, string SplitChar = "")
    {
        var list    = new List <LocAndValue <String> >();
        var reglist = RegularTool.GetRegular(OrgString, regularfeature.RegularExpress);

        foreach (var reg in reglist)
        {
            //根据前后词语进行过滤
            bool   IsBeforeOK   = true;
            string BeforeString = "";
            if (regularfeature.LeadingWordList != null)
            {
                IsBeforeOK = false;
                //前置词语
                foreach (var leading in regularfeature.LeadingWordList)
                {
                    if (reg.Index - leading.Length >= 0)
                    {
                        var word = OrgString.Substring(reg.Index - leading.Length, leading.Length);
                        if (word.Equals(leading))
                        {
                            BeforeString = leading;
                            IsBeforeOK   = true;
                            break;
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
            }
            if (!IsBeforeOK)
            {
                continue;
            }

            bool   IsAfterOK   = true;
            string AfterString = "";
            if (regularfeature.TrailingWordList != null)
            {
                IsAfterOK = false;
                //后置词语
                foreach (var trailing in regularfeature.TrailingWordList)
                {
                    if (reg.Index + reg.Length + trailing.Length <= OrgString.Length)
                    {
                        var word = OrgString.Substring(reg.Index + reg.Length, trailing.Length);
                        if (word.Equals(trailing))
                        {
                            AfterString = trailing;
                            IsAfterOK   = true;
                            break;
                        }
                        else
                        {
                            continue;
                        }
                    }
                }
            }

            if (IsBeforeOK && IsAfterOK)
            {
                var Loc = new LocAndValue <String>()
                {
                    Value    = BeforeString + SplitChar + reg.RawData + SplitChar + AfterString,
                    StartIdx = reg.Index - BeforeString.Length,
                    Loc      = loc
                };
                list.Add(Loc);
            }
        }
        return(list);
    }