/// <summary> /// 构造为Html完整的数据 /// </summary> /// <param name="pRule"></param> /// <returns></returns> public string[] GetUrls(DrillRegularRule pRule, string pSourceUrl) { if (pRule.DrillType == 0) { string sRegionHtml = GetRegion(pRule.StartTag, pRule.EndTag); if (sRegionHtml != null && sRegionHtml.Length > 0) { string sDomain; string sUrlBase = GetUrlBase(pSourceUrl, out sDomain); string[] sUrlList; if (pRule.FeatureType == 0) { sUrlList = GetLinks(sRegionHtml, sDomain, sUrlBase); } else { sUrlList = GetImgs(sRegionHtml, sDomain, sUrlBase); } if (pRule.Feature != null && pRule.Feature.Length > 0) { List <string> sList = new List <string>(); for (int i = 0; i < sUrlList.Length; i++) { if (sUrlList[i].IndexOf(pRule.Feature, StringComparison.OrdinalIgnoreCase) >= 0) { sList.Add(sUrlList[i]); } } return(sList.ToArray()); } return(sUrlList); } } return(null); }
public DrillRegularRule ToDrillRegularRule() { DrillRegularRule sDrillRegularRule = new DrillRegularRule(); sDrillRegularRule.DbID=DbID; sDrillRegularRule.DbName=DbName; sDrillRegularRule.DrillType=DrillType; sDrillRegularRule.EndTag=EndTag; sDrillRegularRule.Feature=Feature; sDrillRegularRule.FeatureType=FeatureType; sDrillRegularRule.Feilds=Feilds; sDrillRegularRule.MetaModalID=MetaModalID; sDrillRegularRule.Name=Name; sDrillRegularRule.Splitter=Splitter; sDrillRegularRule.StartTag = StartTag; return sDrillRegularRule; }
public DrillRegularRule ToDrillRegularRule() { DrillRegularRule sDrillRegularRule = new DrillRegularRule(); sDrillRegularRule.DbID = DbID; sDrillRegularRule.DbName = DbName; sDrillRegularRule.DrillType = DrillType; sDrillRegularRule.EndTag = EndTag; sDrillRegularRule.Feature = Feature; sDrillRegularRule.FeatureType = FeatureType; sDrillRegularRule.Feilds = Feilds; sDrillRegularRule.MetaModalID = MetaModalID; sDrillRegularRule.Name = Name; sDrillRegularRule.Splitter = Splitter; sDrillRegularRule.StartTag = StartTag; return(sDrillRegularRule); }
/// <summary> /// 获取记录区Html片段 /// </summary> /// <param name="pRule"></param> /// <returns></returns> public string[] GetRecordHtmls(DrillRegularRule pRule) { if (pRule.DrillType == 2) { string sRegionHtml = GetRegion(pRule.StartTag, pRule.EndTag); if (sRegionHtml != null && sRegionHtml.Length > 0) { if (pRule.Splitter.Length > 0) { return(sRegionHtml.Split(new string[] { pRule.Splitter }, StringSplitOptions.RemoveEmptyEntries)); } else { return(new string[] { sRegionHtml }); } } } return(null); //不符合要求 }
/// <summary> /// 记录区-判断规则是否完备 /// </summary> /// <param name="pDrillRule"></param> /// <returns></returns> public bool CanExe(DrillRegularRule pDrillRule) { if (pDrillRule.ConditionType == 0) { return true; } else if (pDrillRule.ConditionType == 1) { if (pDrillRule.ConditionTag == null || pDrillRule.ConditionTag.Length == 0 || mOriHtml.IndexOf(pDrillRule.ConditionTag, StringComparison.OrdinalIgnoreCase) >= 0) { return true; } } else if (pDrillRule.ConditionType == 2) { if (pDrillRule.ConditionTag != null && pDrillRule.ConditionTag.Length > 0 && mOriHtml.IndexOf(pDrillRule.ConditionTag, StringComparison.OrdinalIgnoreCase) == -1) { return true; } } return false; }
/// <summary> /// 记录区-判断规则是否完备 /// </summary> /// <param name="pDrillRule"></param> /// <returns></returns> public bool CanExe(DrillRegularRule pDrillRule) { if (pDrillRule.ConditionType == 0) { return(true); } else if (pDrillRule.ConditionType == 1) { if (pDrillRule.ConditionTag == null || pDrillRule.ConditionTag.Length == 0 || mOriHtml.IndexOf(pDrillRule.ConditionTag, StringComparison.OrdinalIgnoreCase) >= 0) { return(true); } } else if (pDrillRule.ConditionType == 2) { if (pDrillRule.ConditionTag != null && pDrillRule.ConditionTag.Length > 0 && mOriHtml.IndexOf(pDrillRule.ConditionTag, StringComparison.OrdinalIgnoreCase) == -1) { return(true); } } return(false); }
/// <summary> /// 分隔记录 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void SplitRecord(object sender, System.EventArgs e) { try { DrillRegularRule sDrillRule = new DrillRegularRule(); sDrillRule.StartTag = TxtStartTag.Text.Trim(); sDrillRule.EndTag = TxtEndTag.Text.Trim(); sDrillRule.DrillType = 2; sDrillRule.Splitter = TxtSpliter.Text.Trim(); mRecordHtmls = new RegScriptTransactor(mHtml).GetRecordHtmls(sDrillRule); if (mRecordHtmls != null && mRecordHtmls.Length > 0) { TxtTotal.Text = mRecordHtmls.Length.ToString(); ShowRecord(0); } else { TxtTotal.Text = "0"; ShowRecord(-1); } } catch (Exception E) { MessageBox.Show(E.Message, "提示信息"); } }
/// <summary> /// 加载提取规则 /// </summary> private void LoadDrillRule(DrillRegularRule pRule) { if (pRule != null) { mDrillRule = pRule; } else { mDrillRule = new DrillRegularRule(); } CmbConditionType.SelectedIndex = mDrillRule.ConditionType; TxtConditionTag.Text = mDrillRule.ConditionTag; TxtStartTag.Text = mDrillRule.StartTag; TxtEndTag.Text = mDrillRule.EndTag; if (mDrillRule.DrillType == 0) { tabControl3.SelectedIndex = 0; //提取类型 } else { tabControl3.SelectedIndex = 1; //提取类型 } #region 链接提取模式 CmbLinkType.SelectedIndex = mDrillRule.FeatureType; TxtUrlFeatrue.Text = mDrillRule.Feature; #endregion #region 高级提取模式 CDbDefine sCDbDefine = CommonDbServer.GetModalDbByID(mDrillRule.MetaModalID); TxtMeta.Text = sCDbDefine.DbName; TxtMeta.Tag = sCDbDefine; TxtSpliter.Text = mDrillRule.Splitter; SplitRecord(null, null); #endregion }
private void btnTest_Click(object sender, EventArgs e) { RegScriptTransactor sRegScriptTransactor = new RegScriptTransactor(mTree.Html); DrillRegularRule sDrillRule = new DrillRegularRule(); sDrillRule.StartTag = TxtStartTag.Text.Trim(); sDrillRule.EndTag = TxtEndTag.Text.Trim(); sDrillRule.DrillType = 0; sDrillRule.MetaModalID = SysDbDefines.下载链接.DbModelID; sDrillRule.FeatureType = CmbLinkType.SelectedIndex; sDrillRule.Feature = TxtUrlFeatrue.Text; string[] sUrls = sRegScriptTransactor.GetUrls(sDrillRule, mTree.URL); List<string> sList = new List<string>(); for (int i = 0; i < sUrls.Length; i++) { sList.Add((i + 1).ToString() + "." + sUrls[i]); } TxtTestResult0.Lines = sList.ToArray(); }
/// <summary> /// 构造为Html完整的数据 /// </summary> /// <param name="pRule"></param> /// <returns></returns> public string[] GetUrls(DrillRegularRule pRule, string pSourceUrl) { if (pRule.DrillType == 0) { string sRegionHtml = GetRegion(pRule.StartTag, pRule.EndTag); if (sRegionHtml != null && sRegionHtml.Length > 0) { string sDomain; string sUrlBase = GetUrlBase(pSourceUrl, out sDomain); string[] sUrlList; if (pRule.FeatureType == 0) { sUrlList = GetLinks(sRegionHtml, sDomain, sUrlBase); } else { sUrlList = GetImgs(sRegionHtml, sDomain, sUrlBase); } if (pRule.Feature != null && pRule.Feature.Length > 0) { List<string> sList = new List<string>(); for (int i = 0; i < sUrlList.Length; i++) { if (sUrlList[i].IndexOf(pRule.Feature, StringComparison.OrdinalIgnoreCase) >= 0) { sList.Add(sUrlList[i]); } } return sList.ToArray(); } return sUrlList; } } return null; }
/// <summary> /// 获取记录区Html片段 /// </summary> /// <param name="pRule"></param> /// <returns></returns> public string[] GetRecordHtmls(DrillRegularRule pRule) { if (pRule.DrillType == 2) { string sRegionHtml = GetRegion(pRule.StartTag, pRule.EndTag); if (sRegionHtml != null && sRegionHtml.Length > 0) { if (pRule.Splitter.Length > 0) { return sRegionHtml.Split(new string[] { pRule.Splitter }, StringSplitOptions.RemoveEmptyEntries); } else { return new string[] { sRegionHtml }; } } } return null; //不符合要求 }