Esempio n. 1
0
        public VideoInfo GetInfo(String url)
        {
            String vid = strUtil.TrimStart(url, "http://v.youku.com/v_show/id_");

            vid = strUtil.TrimEnd(vid, ".html");

            String flashUrl = string.Format("http://player.youku.com/player.php/sid/{0}/v.swf", vid);

            VideoInfo vi = new VideoInfo();

            vi.PlayUrl  = url;
            vi.FlashUrl = flashUrl;
            vi.FlashId  = vid;

            try {
                String pageBody = PageLoader.Download(url);

                Match  mt    = Regex.Match(pageBody, "<title>([^<]+?)</title>");
                String title = VideoHelper.GetTitle(mt.Groups[1].Value);

                Match m = Regex.Match(pageBody, "pic=(http://[^:]+?.ykimg.com.+?)\"");

                String picUrl = m.Groups[1].Value;

                vi.Title  = title;
                vi.PicUrl = picUrl;

                return(vi);
            }
            catch (Exception ex) {
                logger.Error("getUrl=" + url);
                logger.Error(ex.Message);
                return(vi);
            }
        }
Esempio n. 2
0
        protected string getDetailPageBody(string detailUrl, SpiderTemplate template, StringBuilder sb)
        {
            try {
                sb.AppendLine("抓取详细页..." + detailUrl);

                String page;
                if (strUtil.HasText(template.DetailEncoding))
                {
                    page = PageLoader.Download(detailUrl, SpiderConfig.UserAgent, template.DetailEncoding);
                }
                else
                {
                    page = PageLoader.Download(detailUrl, SpiderConfig.UserAgent, "");
                }

                template.SiteUrl = new UrlInfo(detailUrl).SiteUrl;

                if (strUtil.IsNullOrEmpty(page))
                {
                    logInfo("error=原始页面没有内容:" + detailUrl, detailUrl, template, sb);
                }

                return(page);
            }
            catch (Exception ex) {
                logInfo("error=抓取" + detailUrl + "发生错误:" + ex.Message, detailUrl, template, sb);
                return(null);
            }
        }
Esempio n. 3
0
        public Task <IDocument> OpenDocument(string url)
        {
            if (StateSelector != null)
            {
                // Flip out requested url for new state (assumes previous state was State1)
                foreach (var comic in comics)
                {
                    if (url == comic.State1.ArchiveUrl)
                    {
                        url = StateSelector(comic).ArchiveUrl;
                        break;
                    }
                    else if (url == comic.State1.PageUrl)
                    {
                        url = StateSelector(comic).PageUrl;
                        break;
                    }
                }
            }

            return(PageLoader.GetBrowsingContext().OpenAsync(res =>
            {
                res.Content(new FileStream(new Uri(url).LocalPath, FileMode.Open), shouldDispose: true)
                .Address(url);
            }));
        }
Esempio n. 4
0
        private static string downloadListPageBody(SpiderTemplate s, StringBuilder sb)
        {
            String target;

            if (strUtil.HasText(s.ListEncoding))
            {
                target = PageLoader.Download(s.ListUrl, SpiderConfig.UserAgent, s.ListEncoding);
            }
            else
            {
                target = PageLoader.Download(s.ListUrl, SpiderConfig.UserAgent, "");
            }

            if (strUtil.IsNullOrEmpty(target))
            {
                logInfo("error=原始页面没有内容: " + s.ListUrl, s, sb);

                return(target);
            }

            Match match = Regex.Match(target, s.GetListBodyPattern(), RegexOptions.Singleline);

            if (match.Success)
            {
                target = match.Value;
            }
            else
            {
                target = "";
                logInfo("error=没有匹配的页面内容:" + s.ListUrl, s, sb);
            }

            return(target.Trim());
        }
 public PageActionInvokerProvider(
     PageLoader pageLoader,
     PageActionInvokerCache pageActionInvokerCache,
     IModelMetadataProvider modelMetadataProvider,
     ITempDataDictionaryFactory tempDataFactory,
     IOptions <MvcOptions> mvcOptions,
     IOptions <MvcViewOptions> mvcViewOptions,
     IPageHandlerMethodSelector selector,
     DiagnosticListener diagnosticListener,
     ILoggerFactory loggerFactory,
     IActionResultTypeMapper mapper,
     IActionContextAccessor?actionContextAccessor = null)
 {
     _pageLoader             = pageLoader;
     _pageActionInvokerCache = pageActionInvokerCache;
     _valueProviderFactories = mvcOptions.Value.ValueProviderFactories.ToArray();
     _modelMetadataProvider  = modelMetadataProvider;
     _tempDataFactory        = tempDataFactory;
     _mvcViewOptions         = mvcViewOptions.Value;
     _selector              = selector;
     _diagnosticListener    = diagnosticListener;
     _logger                = loggerFactory.CreateLogger <PageActionInvoker>();
     _mapper                = mapper;
     _actionContextAccessor = actionContextAccessor ?? ActionContextAccessor.Null;
 }
Esempio n. 6
0
        public VideoInfo GetInfo(string playUrl)
        {
            String[] arrItem = strUtil.TrimEnd(playUrl, ".shtml").Split('/');
            String   flashId = arrItem[arrItem.Length - 1];

            VideoInfo vi = new VideoInfo();

            vi.PlayUrl  = playUrl;
            vi.FlashId  = flashId;
            vi.FlashUrl = string.Format("http://v.ifeng.com/include/exterior.swf?guid={0}&AutoPlay=false", flashId);


            try {
                String pageBody = PageLoader.Download(playUrl, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", "utf-8");

                Match  mt      = Regex.Match(pageBody, "var videoinfo=({.+?});");
                String strJson = mt.Groups[1].Value;

                Dictionary <String, Object> dic = JSON.ToDictionary(strJson);

                vi.PicUrl = dic.ContainsKey("img") ? dic["img"].ToString() : "";
                vi.Title  = dic.ContainsKey("name") ? dic["name"].ToString() : "";


                return(vi);
            }
            catch (Exception ex) {
                logger.Error("getUrl=" + playUrl);
                logger.Error(ex.Message);
                return(vi);
            }
        }
Esempio n. 7
0
 public static void Main() {
     if (!File.Exists("records.txt")) {
         var lines = new PageLoader().Get()
             .SelectMany(page => MatchUsers(page.Document).OfType<Match>())
             .Select(s => s.Groups[1].Value)
             .Select(s => new { 
                 name = ClearName(Regex.Match(s, "href=\"/user/[^\"]+\">(.+?)</a>").Groups[1].Value),
                 icon = Regex.Match(s, "<img src=\"[^\"]+/user/(\\d+)\"").Groups[1].Value 
             })
             .Where(s => !string.IsNullOrEmpty(s.icon))
             .Select(s => s.icon + " " + s.name);
         File.WriteAllLines("records.txt", lines);
     }
     
     var info = File.ReadAllLines("records.txt")
         .Select(s => s.Split(new char[] { ' ' }, 2))
         .Select(s => new { icon = s[0], name = s[1] })
         .GroupBy(s => HashCode(s.name))
         .Select(s => new { nameHash = s.Key, icon = int.Parse(s.First().icon) })
         .OrderBy(s => s.nameHash)
         .ToList();
         
     File.WriteAllBytes("user.names.dat", info.SelectMany(s => BitConverter.GetBytes(s.nameHash)).ToArray());
     File.WriteAllBytes("user.icons.dat", info.SelectMany(s => BitConverter.GetBytes(s.icon)).ToArray());
 }
Esempio n. 8
0
        public VideoInfo GetInfo(String url)
        {
            String vid = strUtil.TrimStart(url, "http://www.tudou.com/programs/view/").TrimEnd('/');

            String flashUrl = string.Format("http://www.tudou.com/v/{0}/v.swf", vid);

            VideoInfo vi = new VideoInfo();

            vi.PlayUrl  = url;
            vi.FlashId  = vid;
            vi.FlashUrl = flashUrl;

            try {
                String pageBody = PageLoader.Download(url);

                Match  mt    = Regex.Match(pageBody, "<title>([^<]+?)</title>");
                String title = VideoHelper.GetTitle(mt.Groups[1].Value);

                Match  m      = Regex.Match(pageBody, "thumbnail[^']+?'([^']+?)'");
                String picUrl = m.Groups[1].Value;

                vi.Title  = title;
                vi.PicUrl = picUrl;

                return(vi);
            }
            catch (Exception ex) {
                logger.Error("getUrl=" + url);
                logger.Error(ex.Message);

                return(vi);
            }
        }
Esempio n. 9
0
        public VideoInfo GetInfo(string playUrl)
        {
            VideoInfo vi = new VideoInfo();

            vi.PlayUrl = playUrl;

            try {
                String pageBody = PageLoader.Download(playUrl);

                Match  mt      = Regex.Match(pageBody, "video : {(" + "." + "+?)\\}[^,]", RegexOptions.Singleline);
                String strJson = "{" + mt.Groups[1].Value + "}";

                Dictionary <String, Object> dic = JSON.ToDictionary(strJson);

                vi.PicUrl   = dic.ContainsKey("pic") ? dic["pic"].ToString() : "";
                vi.FlashUrl = dic.ContainsKey("swfOutsideUrl") ? dic["swfOutsideUrl"].ToString() : "";
                vi.Title    = dic.ContainsKey("title") ? dic["title"].ToString() : "";

                return(vi);
            }
            catch (Exception ex) {
                logger.Error("getUrl=" + playUrl);
                logger.Error(ex.Message);

                return(vi);
            }
        }
Esempio n. 10
0
    public static void Main()
    {
        if (!File.Exists("records.txt"))
        {
            var lines = new PageLoader().Get()
                        .SelectMany(page => MatchUsers(page.Document).OfType <Match>())
                        .Select(s => s.Groups[1].Value)
                        .Select(s => new {
                name = ClearName(Regex.Match(s, "href=\"/user/[^\"]+\">(.+?)</a>").Groups[1].Value),
                icon = Regex.Match(s, "<img src=\"[^\"]+/user/(\\d+)\"").Groups[1].Value
            })
                        .Where(s => !string.IsNullOrEmpty(s.icon))
                        .Select(s => s.icon + " " + s.name);
            File.WriteAllLines("records.txt", lines);
        }

        var info = File.ReadAllLines("records.txt")
                   .Select(s => s.Split(new char[] { ' ' }, 2))
                   .Select(s => new { icon = s[0], name = s[1] })
                   .GroupBy(s => HashCode(s.name))
                   .Select(s => new { nameHash = s.Key, icon = int.Parse(s.First().icon) })
                   .OrderBy(s => s.nameHash)
                   .ToList();

        File.WriteAllBytes("user.names.dat", info.SelectMany(s => BitConverter.GetBytes(s.nameHash)).ToArray());
        File.WriteAllBytes("user.icons.dat", info.SelectMany(s => BitConverter.GetBytes(s.icon)).ToArray());
    }
Esempio n. 11
0
        //利用HtmlAgilityPack生成HtmlDocument
        protected HtmlDocument getDetailPageBodyHtmlDocument(string detailUrl, SpiderTemplate template, StringBuilder sb)
        {
            try {
                sb.AppendLine("抓取详细页..." + detailUrl);
                HtmlDocument htmlDoc = new HtmlDocument {
                    OptionAddDebuggingAttributes = false,
                    OptionAutoCloseOnEnd         = true,
                    OptionFixNestedTags          = true,
                    OptionReadEncoding           = true
                };

                String page;
                if (strUtil.HasText(template.DetailEncoding))
                {
                    page = PageLoader.Download(detailUrl, SpiderConfig.UserAgent, template.DetailEncoding);
                }
                else
                {
                    page = PageLoader.Download(detailUrl, SpiderConfig.UserAgent, "");
                }

                htmlDoc.LoadHtml(page);

                return(htmlDoc);
            }
            catch (Exception ex) {
                logInfo("error=抓取" + detailUrl + "发生错误:" + ex.Message, detailUrl, template, sb);
                return(null);
            }
        }
Esempio n. 12
0
        public void testPage()
        {
            String page = PageLoader.Download("http://news.ifeng.com/toprank/day/", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", "utf-8");

            Assert.IsNotEmpty(page);
            Assert.Greater(page.IndexOf("点击排行榜"), 0);
        }
Esempio n. 13
0
        public VideoInfo GetInfo(String url)
        {
            String vid = strUtil.TrimStart(url, "http://v.ku6.com/show/");

            vid = strUtil.TrimEnd(vid, ".html");

            String flashUrl = string.Format("http://player.ku6.com/refer/{0}/v.swf", vid);

            VideoInfo vi = new VideoInfo();

            vi.PlayUrl  = url;
            vi.FlashUrl = flashUrl;
            vi.FlashId  = vid;

            try {
                String pageBody = PageLoader.Download(url);

                Match  mt    = Regex.Match(pageBody, "<title>([^<]+?)</title>");
                String title = VideoHelper.GetTitle(mt.Groups[1].Value);

                Match  m      = Regex.Match(pageBody, "<span class=\"s_pic\">([^<]+?)</span>");
                String picUrl = m.Groups[1].Value;

                vi.Title  = title;
                vi.PicUrl = picUrl;

                return(vi);
            }
            catch (Exception ex) {
                logger.Error("getUrl=" + url);
                logger.Error(ex.Message);
                return(vi);
            }
        }
        public async void SearchPhrase(string phrase)
        {
            PageLoader pageLoader = PageLoader.GetPageLoader();
            List <Func <string, Task <ImageSearchResult> > > imageSearchResultsFunc = null;

            for (int i = 0; i < 5; i++)
            {
                var page = await pageLoader.LoadPageAsync($@"https://www.google.com/search?q={phrase}&source=lnms&tbm=isch");

                var imageUrlGenerator = GoogleImageDOMParser.GetUrls(page);
                imageSearchResultsFunc = imageUrlGenerator.ToList();
                if (imageSearchResultsFunc.Count > 0)
                {
                    break;
                }
            }
            if (imageSearchResultsFunc == null)
            {
                throw new InvalidDataException($"After {5} attempts no search results was found!");
            }
            var imageSearchResultTasks = imageSearchResultsFunc
                                         .AsParallel()
                                         .Select(o => o(CacheDirectory))
                                         .ToList();
            //imageSearchResultTasks.ForEach((imageResultTask) =>
            await Task.Factory.StartNew(() =>
            {
                Parallel.ForEach(imageSearchResultTasks, (imageResultTask) =>
                {
                    Stopwatch watch = new Stopwatch();
                    watch.Start();
                    //Debug.WriteLine($"Started downloading {imageResultTask.Name} from [{imageResultTask.ImageWebUrl}]");
                    try
                    {
                        imageResultTask.Start();
                        //imageResultTask.Start();
                        imageResultTask.Wait(10000);
                        //var filePath = Path.Combine(CacheDirector, $"{imageResultTask.Name}.{imageResultTask.FileExtension}");
                        //webClient.DownloadFile(imageResultTask.ImageWebUrl, filePath);
                        if (imageResultTask.IsCompleted && imageResultTask.Result != null)
                        {
                            lock (searchResultsLock)
                            {
                                var added = imageResultTask.Result;
                                SearchResults.Add(added);
                                //CollectionChanged.BeginInvoke(SearchResults, new NotifyCollectionChangedEventArgs(NotifyCollectionChangedAction.Add, added), (o) => { }, SearchResults);
                            }
                        }
                        Debug.WriteLine($"Finished downloading {imageResultTask.Result.Name} in {watch.ElapsedMilliseconds}ms", "INFO");
                    }
                    catch
                    {
                        Debug.WriteLine("Exception occured");
                    }
                    watch.Stop();
                });
            });

            Debug.WriteLine("All images were downloaded succesfully", "INFO");
        }
Esempio n. 15
0
        //#region 判断是否为土豆视频链接
        ///// <summary>
        ///// 判断是否为土豆视频链接
        ///// </summary>
        ///// <returns></returns>
        //public static bool IsTuDouVideoUrl(string url)
        //{
        //    if (url.StartsWith("http://www.tudou.com/programs/view/"))
        //    {
        //        return true;
        //    }
        //    else
        //    {
        //        return false;
        //    }
        //}
        //#endregion

        //#region 得到土豆视频的视频编码
        ///// <summary>
        ///// 得到土豆视频的视频编码
        ///// </summary>
        //public static string GetTuDouVideoItemCode(string url)
        //{
        //    String vid = "";
        //    //if (StringPlus.IsNullOrEmpty(url) || !IsTuDouVideoUrl(url))
        //    //{
        //    //    return "";
        //    //}
        //    if (StringPlus.IsNullOrEmpty(url))
        //    {
        //        return "";
        //    }
        //    vid = StringPlus.TrimStart(url, "http://www.tudou.com/programs/view/");
        //    if (StringPlus.IsNullOrEmpty(vid))
        //    {
        //        return "";
        //    }
        //    int FirstPoint = vid.IndexOf("/");
        //    if (FirstPoint > 0)
        //    {
        //        vid = vid.Substring(0, FirstPoint);
        //    }
        //    return vid;
        //}
        //#endregion

        //#region 得到土豆视频信息

        ///// <summary>
        ///// 得到土豆视频信息
        ///// </summary>
        ///// <param name="url"></param>
        ///// <returns></returns>
        //public static TuDouInfo GetTuDouInfo(string url)
        //{
        //    string itemCode = GetTuDouVideoItemCode(url);
        //    if (StringPlus.IsNullOrEmpty(itemCode))
        //    {
        //        return null;
        //    }

        //    string jsonData = PageLoader.Download(string.Format(tuDouJsonDataApiUrl, tuDouAppKey, itemCode));
        //    if (StringPlus.IsNullOrEmpty(jsonData))
        //    {
        //        return null;
        //    }

        //    string strStart = "{\"multiResult\":{\"results\":[";
        //    if (jsonData.StartsWith(strStart))
        //    {
        //        jsonData = jsonData.Replace(strStart, "");
        //    }

        //    string strEnd = "]}}";
        //    if (jsonData.EndsWith(strEnd))
        //    {
        //        int FirstPoint = jsonData.LastIndexOf(strEnd);
        //        if (FirstPoint > 0)
        //        {
        //            jsonData = jsonData.Substring(0, FirstPoint);
        //        }
        //    }

        //    TuDouInfo info = (TuDouInfo)Jayrock.Json.Conversion.JsonConvert.Import(typeof(TuDouInfo), jsonData);

        //    return info;
        //}
        //#endregion

        #region 判断是否是酷6视频链接
        /// <summary>
        /// 判断是否是酷6视频链接
        /// </summary>
        /// <returns></returns>
        public static bool IsKu6VideoUrl(string url)
        {
            string xmlData = PageLoader.Download(string.Format(ku6XmlDataApiUrl, url));

            if (!string.IsNullOrEmpty(xmlData))
            {
                XmlDocument xmldoc = new XmlDocument();

                xmldoc.LoadXml(xmlData);

                XmlNode xmlNodeType = xmldoc.SelectSingleNode("root/result");

                int type = -1;
                if (null != xmlNodeType && null != xmlNodeType.Attributes.GetNamedItem("type"))
                {
                    type = Convert.ToInt32(xmlNodeType.Attributes.GetNamedItem("type").Value);
                }
                else
                {
                    return(false);
                }
                if (type == -1)
                {
                    return(false);
                }
                else
                {
                    return(true);
                }
            }
            else
            {
                return(false);
            }
        }
Esempio n. 16
0
 public RequestHandlerInfo(IActionDescriptorCollectionProvider actionDescriptorCollectionProvider, PageLoader pageLoader, LinkGenerator linkGenerator)
 {
     _locker  = new object();
     _locker2 = new object();
     _actionDescriptorCollectionProvider = actionDescriptorCollectionProvider;
     _pageLoader    = pageLoader;
     _linkGenerator = linkGenerator;
 }
Esempio n. 17
0
        /// <summary>
        /// Компилируем нужную страницу, и возвращаем её
        /// </summary>
        /// <param name="gameName">Название игры</param>
        /// <param name="fileType">Тип файла</param>
        /// <param name="fileName">Имя файла</param>
        /// <returns>Страница, для возврата</returns>
        private string compilePageByRequest(string gameName, string fileType, string fileName)
        {
            //Инициализируем загрузчик страниц
            PageLoader pl = new PageLoader();

            //Грузим нужную страницу
            return(pl.loadPage(gameName, fileType, fileName));
        }
Esempio n. 18
0
        public PaginalView(PageLoader loader, Page current)
        {
            InitializeComponent();

            Loader       = loader;
            View.Content = current;
            DataContext  = loader;
        }
Esempio n. 19
0
        public void NavigateToArticle(Article article)
        {
            ArticleDetailPage page = new ArticleDetailPage();

            page.LoadArticle(article);
            PageLoader.Navigate(page);
            _onNewArticle = false;
        }
Esempio n. 20
0
    public PageLoaderMatcherPolicy(PageLoader loader)
    {
        if (loader == null)
        {
            throw new ArgumentNullException(nameof(loader));
        }

        _loader = loader;
    }
Esempio n. 21
0
        private int _furthestHorseDistance;      // Distance of the furthest horse (can be another horse)

        /// <summary>
        ///     Constructor
        /// </summary>
        /// <param name="name"></param>
        /// <param name="url"></param>
        public Horse(string name, string url)
        {
            Name  = name;
            _url  = url;
            _ping = new Ping(url);
            _ping.PingReceived   += PingReceived;
            _ping.ThreadFinished += PingFinished;
            _pageLoader           = new PageLoader();
            Distance              = 0;
        }
        /// <summary>
        ///
        /// </summary>
        public HomeView()
        {
            this.InitializeComponent();

            _gameController = new GameController();
            DataContext     = _gameController.Horses;


            PageLoader pageLoader = new PageLoader();
        }
Esempio n. 23
0
        private void Button_Click_6(object sender, RoutedEventArgs e)
        {
            var publisherLoader = new PageLoader(AppCur.LibraryData.Publishers.ToList <object>(), typeof(PublisherOverview));

            publisherLoader.Position = PublisherGrid.SelectedIndex;
            var current = new PublisherOverview(PublisherGrid.SelectedValue);
            var window  = new PaginalView(publisherLoader, current);

            window.ShowDialog();
        }
Esempio n. 24
0
        //private void Button_Click_6(object sender, RoutedEventArgs e)
        //{
        //    BookView.Content = BookLoader.First;
        //    BookLoader.Position = 0;
        //}

        //private void Button_Click_7(object sender, RoutedEventArgs e)
        //{
        //    BookView.Content = BookLoader.Prev;
        //    BookLoader.Position--;
        //}

        //private void Button_Click_8(object sender, RoutedEventArgs e)
        //{
        //    BookView.Content = BookLoader.Next;
        //    BookLoader.Position++;
        //}

        //private void Button_Click_9(object sender, RoutedEventArgs e)
        //{
        //    BookView.Content = BookLoader.Last;
        //    BookLoader.Position = AppCur.LibraryData.Books.Count;
        //}

        private void Button_Click_10(object sender, RoutedEventArgs e)
        {
            var bookLoader = new PageLoader(AppCur.LibraryData.Books.ToList <object>(), typeof(BookOverview));

            bookLoader.Position = BookGrid.SelectedIndex;
            var current = new BookOverview(BookGrid.SelectedValue);
            var window  = new PaginalView(bookLoader, current);

            window.ShowDialog();
        }
Esempio n. 25
0
        private void Button_Click_7(object sender, RoutedEventArgs e)
        {
            var authorLoader = new PageLoader(AppCur.LibraryData.Authors.ToList <object>(), typeof(AuthorOverview));

            authorLoader.Position = AuthorGrid.SelectedIndex;
            var current = new AuthorOverview(AuthorGrid.SelectedValue);
            var window  = new PaginalView(authorLoader, current);

            window.ShowDialog();
        }
Esempio n. 26
0
        private static string downloadListPageBody(SpiderTemplate s, StringBuilder sb)
        {
            String target;

            if (strUtil.HasText(s.ListEncoding))
            {
                target = PageLoader.Download(s.ListUrl, SpiderConfig.UserAgent, s.ListEncoding);
            }
            else
            {
                target = PageLoader.Download(s.ListUrl, SpiderConfig.UserAgent, "");
            }

            if (strUtil.IsNullOrEmpty(target))
            {
                logInfo("error=原始页面没有内容: " + s.ListUrl, s, sb);

                return(target);
            }

            if (!strUtil.IsNullOrEmpty(s.GetListBodyPattern()))
            {
                HtmlDocument htmlDoc = new HtmlDocument {
                    OptionAddDebuggingAttributes = false,
                    OptionAutoCloseOnEnd         = true,
                    OptionFixNestedTags          = true,
                    OptionReadEncoding           = true
                };
                htmlDoc.LoadHtml(target);
                IEnumerable <HtmlNode> Nodes = htmlDoc.DocumentNode.QuerySelectorAll(s.GetListBodyPattern());
                if (Nodes.Count() > 0)
                {
                    target = Nodes.ToArray()[0].OuterHtml;
                    return(target.Trim());
                }
                else
                {
                    logInfo("error=没有匹配的页面内容:" + s.ListUrl, s, sb);
                    return(null);
                }
            }
            //这里未来也可以改成css选择器的方式,来细化目标url集合的范围
            //Match match = Regex.Match(target, s.GetListBodyPattern(), RegexOptions.Singleline);
            //if (match.Success)
            //{
            //    target = match.Value;
            //}
            //else
            //{
            //    target = "";
            //    logInfo("error=没有匹配的页面内容:" + s.ListUrl, s, sb);
            //}

            return(target.Trim());
        }
Esempio n. 27
0
        static void Main(string[] args)
        {
            var nodesLoader = new PageLoader();

            nodesLoader.GetHtmlNodes("http://yandex.ru")
            .Where(node => node.NodeType == HtmlNodeType.Element)
            .ToList()
            .ForEach(node => Console.WriteLine($"{node}: id={node.Id}, name={node.Name}, type={node.NodeType}, XPath={node.XPath}"));

            Console.ReadKey();
        }
Esempio n. 28
0
 public EndpointRedirectingMiddleware(
     ILogger <EndpointRedirectingMiddleware> logger,
     RequestDelegate next,
     IOptions <RouteOptions> routeOptions,
     PageLoader pageLoader)
 {
     _logger       = logger ?? throw new ArgumentNullException(nameof(logger));
     _next         = next ?? throw new ArgumentNullException(nameof(next));
     _routeOptions = routeOptions?.Value ?? throw new ArgumentNullException(nameof(routeOptions));
     _pageLoader   = pageLoader ?? throw new ArgumentNullException(nameof(pageLoader));
 }
 public SearchImageModel(string cacheDirectory)
 {
     pageLoader = PageLoader.GetPageLoader();
     if (!Directory.Exists(cacheDirectory))
     {
         Directory.CreateDirectory(cacheDirectory);
     }
     CacheDirectory     = cacheDirectory;
     CollectionChanged += DebugNotify;
     SearchResults      = new ObservableCollection <ImageSearchResult>();
     BindingOperations.EnableCollectionSynchronization(SearchResults, searchResultsLock);
 }
Esempio n. 30
0
        public void testEncoding()
        {
            // 可以提供encoding,如果不提供,根据 WebResponse 的 ContentType 进行设置。

            String page = PageLoader.Download("http://www.baidu.com/");

            Assert.IsNotEmpty(page);
            Assert.Greater(page.IndexOf("百度"), 0);

            page = PageLoader.Download("http://news.163.com/", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", "");
            Assert.IsNotEmpty(page);
            Assert.Greater(page.IndexOf("网易新闻"), 0);
        }
Esempio n. 31
0
 public PageProcessor(int pageIndex,
                      TargetRecord jobInfo,
                      PageLoader pageLoader,
                      FileDb db,
                      MaxiPageMgr maxiPageMgr,
                      Speaker speaker)
 {
     this.speaker = speaker;
     this.maxiPageMgr = maxiPageMgr;
     this.db = db;
     this.pageLoader = pageLoader;
     this.pageIndex = pageIndex;
     this.jobInfo = jobInfo;
 }
Esempio n. 32
0
        public virtual void GetRemotePic()
        {
            string uri = ctx.Post("upfile");

            uri = uri.Replace("&amp;", "&");
            string[] imgUrls = strUtil.Split(uri, "ue_separate_ue");

            string[] filetype = { ".gif", ".png", ".jpg", ".jpeg", ".bmp" };            //文件允许格式
            int      fileSize = 3000;                                                   //文件大小限制,单位kb

            ArrayList       tmpNames = new ArrayList();
            WebClient       wc       = new WebClient();
            HttpWebResponse res;
            String          tmpName     = String.Empty;
            String          imgUrl      = String.Empty;
            String          currentType = String.Empty;

            try {
                for (int i = 0, len = imgUrls.Length; i < len; i++)
                {
                    imgUrl = imgUrls[i];

                    if (imgUrl.Substring(0, 7) != "http://")
                    {
                        tmpNames.Add("error!");
                        continue;
                    }

                    //格式验证
                    int temp = imgUrl.LastIndexOf('.');
                    currentType = imgUrl.Substring(temp).ToLower();
                    if (Array.IndexOf(filetype, currentType) == -1)
                    {
                        tmpNames.Add("error!");
                        continue;
                    }

                    String imgPath = PageLoader.DownloadPic(imgUrl);
                    tmpNames.Add(imgPath);
                }
            }
            catch (Exception) {
                tmpNames.Add("error!");
            }
            finally {
                wc.Dispose();
            }

            echoJson("{url:'" + converToString(tmpNames) + "',tip:'远程图片抓取成功!',srcUrl:'" + uri + "'}");
        }
Esempio n. 33
0
        private static void Main(string[] args)
        {
            var conf = Configuration.ReadConfiguration("HabraMiner.cfg");
            var useragent =
                "";
               var saver = new MongoArticleSaver<HabrArticle>(conf.DbServer, conf.DbPort, conf.DbName, conf.CollectionName, conf.BatchSave);
            var tasks =
                Enumerable.Range(conf.Start, conf.Count)//30466
                    .Select(
                        num =>
                            PageDownloadTaskFactory.CreateDownloadTask<HabrArticle>(
                                new Uri($"http://www.habrahabr.ru/post/{num}"), Encoding.UTF8, useragent));

            var loader = new PageLoader<HabrArticle>(tasks, article => saver.Save(article));
            loader.RunAllDellayedTasks(1, conf.TaskCount);
            Thread.CurrentThread.Join();
        }
Esempio n. 34
0
 public Home()
 {
     InitializeComponent();
     menu.ContentLoader =loader= new PageLoader(transionPanel);
     Messenger.Default.Register<ActionEvent>(this, handleEvent);
 }
 public MostConsulted()
 {
     InitializeComponent();
     menu.ContentLoader = loader = new PageLoader(transionPanel);
 }