Example #1
0
 /// <summary>
 /// 构造为Html完整的数据
 /// </summary>
 /// <param name="pRule"></param>
 /// <returns></returns>
 public string[] GetUrls(DrillRegularRule pRule, string pSourceUrl)
 {
     if (pRule.DrillType == 0)
     {
         string sRegionHtml = GetRegion(pRule.StartTag, pRule.EndTag);
         if (sRegionHtml != null && sRegionHtml.Length > 0)
         {
             string   sDomain;
             string   sUrlBase = GetUrlBase(pSourceUrl, out sDomain);
             string[] sUrlList;
             if (pRule.FeatureType == 0)
             {
                 sUrlList = GetLinks(sRegionHtml, sDomain, sUrlBase);
             }
             else
             {
                 sUrlList = GetImgs(sRegionHtml, sDomain, sUrlBase);
             }
             if (pRule.Feature != null && pRule.Feature.Length > 0)
             {
                 List <string> sList = new List <string>();
                 for (int i = 0; i < sUrlList.Length; i++)
                 {
                     if (sUrlList[i].IndexOf(pRule.Feature, StringComparison.OrdinalIgnoreCase) >= 0)
                     {
                         sList.Add(sUrlList[i]);
                     }
                 }
                 return(sList.ToArray());
             }
             return(sUrlList);
         }
     }
     return(null);
 }
Example #2
0
 public DrillRegularRule ToDrillRegularRule()
 {
     DrillRegularRule sDrillRegularRule = new DrillRegularRule();
     sDrillRegularRule.DbID=DbID;
     sDrillRegularRule.DbName=DbName;
     sDrillRegularRule.DrillType=DrillType;
     sDrillRegularRule.EndTag=EndTag;
     sDrillRegularRule.Feature=Feature;
     sDrillRegularRule.FeatureType=FeatureType;
     sDrillRegularRule.Feilds=Feilds;
     sDrillRegularRule.MetaModalID=MetaModalID;
     sDrillRegularRule.Name=Name;
     sDrillRegularRule.Splitter=Splitter;
     sDrillRegularRule.StartTag = StartTag;
     return sDrillRegularRule;
 }
Example #3
0
        public DrillRegularRule ToDrillRegularRule()
        {
            DrillRegularRule sDrillRegularRule = new DrillRegularRule();

            sDrillRegularRule.DbID        = DbID;
            sDrillRegularRule.DbName      = DbName;
            sDrillRegularRule.DrillType   = DrillType;
            sDrillRegularRule.EndTag      = EndTag;
            sDrillRegularRule.Feature     = Feature;
            sDrillRegularRule.FeatureType = FeatureType;
            sDrillRegularRule.Feilds      = Feilds;
            sDrillRegularRule.MetaModalID = MetaModalID;
            sDrillRegularRule.Name        = Name;
            sDrillRegularRule.Splitter    = Splitter;
            sDrillRegularRule.StartTag    = StartTag;
            return(sDrillRegularRule);
        }
Example #4
0
 /// <summary>
 /// 获取记录区Html片段
 /// </summary>
 /// <param name="pRule"></param>
 /// <returns></returns>
 public string[] GetRecordHtmls(DrillRegularRule pRule)
 {
     if (pRule.DrillType == 2)
     {
         string sRegionHtml = GetRegion(pRule.StartTag, pRule.EndTag);
         if (sRegionHtml != null && sRegionHtml.Length > 0)
         {
             if (pRule.Splitter.Length > 0)
             {
                 return(sRegionHtml.Split(new string[] { pRule.Splitter }, StringSplitOptions.RemoveEmptyEntries));
             }
             else
             {
                 return(new string[] { sRegionHtml });
             }
         }
     }
     return(null);   //不符合要求
 }
Example #5
0
 /// <summary>
 /// 记录区-判断规则是否完备
 /// </summary>
 /// <param name="pDrillRule"></param>
 /// <returns></returns>
 public bool CanExe(DrillRegularRule pDrillRule)
 {
     if (pDrillRule.ConditionType == 0)
     {
         return true;
     }
     else if (pDrillRule.ConditionType == 1)
     {
         if (pDrillRule.ConditionTag == null || pDrillRule.ConditionTag.Length == 0 || mOriHtml.IndexOf(pDrillRule.ConditionTag, StringComparison.OrdinalIgnoreCase) >= 0)
         {
             return true;
         }
     }
     else if (pDrillRule.ConditionType == 2)
     {
         if (pDrillRule.ConditionTag != null && pDrillRule.ConditionTag.Length > 0 && mOriHtml.IndexOf(pDrillRule.ConditionTag, StringComparison.OrdinalIgnoreCase) == -1)
         {
             return true;
         }
     }
     return false;
 }
Example #6
0
 /// <summary>
 /// 记录区-判断规则是否完备
 /// </summary>
 /// <param name="pDrillRule"></param>
 /// <returns></returns>
 public bool CanExe(DrillRegularRule pDrillRule)
 {
     if (pDrillRule.ConditionType == 0)
     {
         return(true);
     }
     else if (pDrillRule.ConditionType == 1)
     {
         if (pDrillRule.ConditionTag == null || pDrillRule.ConditionTag.Length == 0 || mOriHtml.IndexOf(pDrillRule.ConditionTag, StringComparison.OrdinalIgnoreCase) >= 0)
         {
             return(true);
         }
     }
     else if (pDrillRule.ConditionType == 2)
     {
         if (pDrillRule.ConditionTag != null && pDrillRule.ConditionTag.Length > 0 && mOriHtml.IndexOf(pDrillRule.ConditionTag, StringComparison.OrdinalIgnoreCase) == -1)
         {
             return(true);
         }
     }
     return(false);
 }
Example #7
0
 /// <summary>
 /// 分隔记录
 /// </summary>
 /// <param name="sender"></param>
 /// <param name="e"></param>
 private void SplitRecord(object sender, System.EventArgs e)
 {
     try
     {
         DrillRegularRule sDrillRule = new DrillRegularRule();
         sDrillRule.StartTag = TxtStartTag.Text.Trim();
         sDrillRule.EndTag = TxtEndTag.Text.Trim();
         sDrillRule.DrillType = 2;
         sDrillRule.Splitter = TxtSpliter.Text.Trim();
         mRecordHtmls = new RegScriptTransactor(mHtml).GetRecordHtmls(sDrillRule);
         if (mRecordHtmls != null && mRecordHtmls.Length > 0)
         {
             TxtTotal.Text = mRecordHtmls.Length.ToString();
             ShowRecord(0);
         }
         else
         {
             TxtTotal.Text = "0";
             ShowRecord(-1);
         }
     }
     catch (Exception E)
     {
         MessageBox.Show(E.Message, "提示信息");
     }
 }
Example #8
0
        /// <summary>
        /// 加载提取规则
        /// </summary>
        private void LoadDrillRule(DrillRegularRule pRule)
        {
            if (pRule != null)
            {
                mDrillRule = pRule;
            }
            else
            {
                mDrillRule = new DrillRegularRule();
            }
            CmbConditionType.SelectedIndex = mDrillRule.ConditionType;
            TxtConditionTag.Text = mDrillRule.ConditionTag;

            TxtStartTag.Text = mDrillRule.StartTag;
            TxtEndTag.Text = mDrillRule.EndTag;
            if (mDrillRule.DrillType == 0)
            {
                tabControl3.SelectedIndex = 0;   //提取类型
            }
            else
            {
                tabControl3.SelectedIndex = 1;   //提取类型
            }
            #region 链接提取模式
            CmbLinkType.SelectedIndex = mDrillRule.FeatureType;
            TxtUrlFeatrue.Text = mDrillRule.Feature;
            #endregion

            #region 高级提取模式
            CDbDefine sCDbDefine = CommonDbServer.GetModalDbByID(mDrillRule.MetaModalID);
            TxtMeta.Text = sCDbDefine.DbName;
            TxtMeta.Tag = sCDbDefine;
            TxtSpliter.Text = mDrillRule.Splitter;
            SplitRecord(null, null);
            #endregion
        }
Example #9
0
 private void btnTest_Click(object sender, EventArgs e)
 {
     RegScriptTransactor sRegScriptTransactor = new RegScriptTransactor(mTree.Html);
     DrillRegularRule sDrillRule = new DrillRegularRule();
     sDrillRule.StartTag = TxtStartTag.Text.Trim();
     sDrillRule.EndTag = TxtEndTag.Text.Trim();
     sDrillRule.DrillType = 0;
     sDrillRule.MetaModalID = SysDbDefines.下载链接.DbModelID;
     sDrillRule.FeatureType = CmbLinkType.SelectedIndex;
     sDrillRule.Feature = TxtUrlFeatrue.Text;
     string[] sUrls = sRegScriptTransactor.GetUrls(sDrillRule, mTree.URL);
     List<string> sList = new List<string>();
     for (int i = 0; i < sUrls.Length; i++)
     {
         sList.Add((i + 1).ToString() + "." + sUrls[i]);
     }
     TxtTestResult0.Lines = sList.ToArray();
 }
Example #10
0
 /// <summary>
 /// 构造为Html完整的数据
 /// </summary>
 /// <param name="pRule"></param>
 /// <returns></returns>
 public string[] GetUrls(DrillRegularRule pRule, string pSourceUrl)
 {
     if (pRule.DrillType == 0)
     {
         string sRegionHtml = GetRegion(pRule.StartTag, pRule.EndTag);
         if (sRegionHtml != null && sRegionHtml.Length > 0)
         {
             string sDomain;
             string sUrlBase = GetUrlBase(pSourceUrl, out sDomain);
             string[] sUrlList;
             if (pRule.FeatureType == 0)
             {
                 sUrlList = GetLinks(sRegionHtml, sDomain, sUrlBase);
             }
             else
             {
                 sUrlList = GetImgs(sRegionHtml, sDomain, sUrlBase);
             }
             if (pRule.Feature != null && pRule.Feature.Length > 0)
             {
                 List<string> sList = new List<string>();
                 for (int i = 0; i < sUrlList.Length; i++)
                 {
                     if (sUrlList[i].IndexOf(pRule.Feature, StringComparison.OrdinalIgnoreCase) >= 0)
                     {
                         sList.Add(sUrlList[i]);
                     }
                 }
                 return sList.ToArray();
             }
             return sUrlList;
         }
     }
     return null;
 }
Example #11
0
 /// <summary>
 /// 获取记录区Html片段
 /// </summary>
 /// <param name="pRule"></param>
 /// <returns></returns>
 public string[] GetRecordHtmls(DrillRegularRule pRule)
 {
     if (pRule.DrillType == 2)
     {
         string sRegionHtml = GetRegion(pRule.StartTag, pRule.EndTag);
         if (sRegionHtml != null && sRegionHtml.Length > 0)
         {
             if (pRule.Splitter.Length > 0)
             {
                 return sRegionHtml.Split(new string[] { pRule.Splitter }, StringSplitOptions.RemoveEmptyEntries);
             }
             else
             {
                 return new string[] { sRegionHtml };
             }
         }
     }
     return null;   //不符合要求
 }