public SmartCrawler() { Http = new HttpItem(); CrawlItems = new ObservableCollection<CrawlItem>(); Http.URL = "http://www.cnblogs.com/"; helper = new HttpHelper(); IsMultiData = ListType.List; Documents = new ObservableCollection<HttpItem>(); }
public TransTF() { Source = new ExtendSelector<string>(language.Keys); Target = new ExtendSelector<string>(language.Keys); Source.SelectItem = "自动检测"; Target.SelectItem = "自动检测"; ClientID = "0CupOSsCC4YaDozfkC9gE5EO"; helper = new HttpHelper(); Target.SelectChanged += (s, e) => buffHelper.Clear(); }
public override bool Init(IEnumerable<IFreeDocument> datas) { crawler = processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler; if (crawler != null) { } else { var task = processManager.CurrentProject.Tasks.FirstOrDefault(d => d.Name == CrawlerSelector); if (task == null) return false; ControlExtended.UIInvoke(() => { task.Load(false); }); crawler = processManager.CurrentProcessCollections.FirstOrDefault(d => d.Name == CrawlerSelector) as SmartCrawler; } helper = new HttpHelper(); return base.Init(datas); }
public static IEnumerable<List<FreeDocument>> GetMultiDataFromURL(string url) { var httpitem = new HttpItem {URL = url}; var helper = new HttpHelper(); HttpStatusCode statusCode; var doc2 = helper.GetHtml(httpitem, out statusCode); if (statusCode != HttpStatusCode.OK) yield break; if (doc2 == null) yield return new List<FreeDocument>(); var htmldoc = new HtmlDocument(); htmldoc.LoadHtml(doc2); foreach (var item in htmldoc.GetDataFromHtml()) { yield return item; } }
public static HtmlDocument GetHtmlDocument(string url) { var httpitem = new HttpItem {URL = url}; var helper = new HttpHelper(); HttpStatusCode statusCode; var doc2 = helper.GetHtml(httpitem, out statusCode); if (statusCode != HttpStatusCode.OK) return null; var htmldoc = new HtmlDocument(); htmldoc.LoadHtml(doc2); return htmldoc; }
public static HtmlDocument GetDocumentFromURL(string url, EncodingType encoding = EncodingType.Unknown) { var httpitem = new HttpItem(); httpitem.URL = url; httpitem.Encoding = encoding; var helper = new HttpHelper(); HttpStatusCode code; var doc = new HtmlDocument(); var result = helper.GetHtml(httpitem, out code); if (!HttpHelper.IsSuccess(code)) return doc; doc.LoadHtml(result); return doc; }