Ejemplo n.º 1
0
 public void Setup()
 {
     _crawledPage = new CrawledPage(_uri)
     {
         HttpWebRequest = (HttpWebRequest)WebRequest.Create(_uri)
     };
     _unitUnderTest = GetInstance(false, false);
 }
Ejemplo n.º 2
0
        public void Setup()
        {
            _crawledPage = new PageRequester(new CrawlConfiguration()).MakeRequest(new Uri("http://localhost.fiddler:1111/"));

            //Make the real request above look like it came from the fake uri
            _crawledPage.ParentUri      = _uri;
            _crawledPage.HttpWebRequest = (HttpWebRequest)WebRequest.Create(_uri);

            _unitUnderTest = GetInstance(false, false, null, false, false);
        }
Ejemplo n.º 3
0
        public void GetLinks_MetaNoIndexNoFollowUsingNoneUpperCase_ReturnsEmptyList()
        {
            _unitUnderTest            = GetInstance(true, false);
            _crawledPage.Content.Text = "<META NAME=\"ROBOTS\" CONTENT=\"NONE\" /><a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            IEnumerable <Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(0, result.Count());
        }
Ejemplo n.º 4
0
        public void GetLinks_MetaNoIndex_ReturnsLinks()
        {
            _unitUnderTest            = GetInstance(true, false);
            _crawledPage.Content.Text = "<meta name=\"robots\" content=\"noindex\" /><a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            IEnumerable <Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(2, result.Count());
        }
Ejemplo n.º 5
0
        public void GetLinks_RelNoFollow_NotReturned()
        {
            _unitUnderTest            = GetInstance(false, true, null, false, false);
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" rel=\"nofollow\"></a><a href=\"/bbb/b.html\" rel=\"nofollow\" /></a>";

            var result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(0, result.Count());
        }
Ejemplo n.º 6
0
        public void GetLinks_MetaNoFollow_ReturnsEmptyList()
        {
            _unitUnderTest            = GetInstance(true, false, null, false, false);
            _crawledPage.Content.Text = "<meta name=\"robots\" content=\"nofollow\" /><a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            var result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(0, result.Count());
        }
Ejemplo n.º 7
0
        public void Init()
        {
            _crawledPage = new CrawledPage(_uri);

            _crawledPage.ParentUri           = _uri;
            _crawledPage.HttpRequestMessage  = new HttpRequestMessage(HttpMethod.Get, _uri);
            _crawledPage.HttpResponseMessage = new HttpResponseMessage();

            _unitUnderTest = GetInstance(false, false, null, false, false);
        }
Ejemplo n.º 8
0
        public void GetLinks_RelNoFollowUpperCase_NotReturned()
        {
            _unitUnderTest            = GetInstance(false, true);
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" REL=\"NOFOLLOW\"></a><a href=\"/bbb/b.html\" REL=\"NOFOLLOW\" /></a>";

            IEnumerable <Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(0, result.Count());
        }
Ejemplo n.º 9
0
        public void GetLinks_HttpXRobotsTagHeaderNoIndex_ReturnsLinks()
        {
            _crawledPage.HttpWebResponse.AddResponseHeader("X-Robots-Tag", "noindex");
            _unitUnderTest            = GetInstance(false, false, null, false, true);
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            IEnumerable <Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(2, result.Count());
        }
Ejemplo n.º 10
0
        public async Task Setup()
        {
            UnitTestConfig unitTestConfig = new UnitTestConfig();

            _crawledPage = await new PageRequester(new CrawlConfiguration()).MakeRequestAsync(new Uri(unitTestConfig.SiteSimulatorBaseAddress));

            //Make the real request above look like it came from the fake uri
            _crawledPage.ParentUri          = _uri;
            _crawledPage.HttpRequestMessage = new System.Net.Http.HttpRequestMessage(System.Net.Http.HttpMethod.Get, _uri);

            _unitUnderTest = GetInstance(false, false, null, false, false);
        }
Ejemplo n.º 11
0
        public void GetLinks_CleanUrlDelegateSet_ReturnsCleanLinks()
        {
            _unitUnderTest            = GetInstance(false, false, (x) => x.Replace("a", "x").Replace("b", "y"));
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            IEnumerable <Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(2, result.Count());
            Assert.AreEqual("http://a.com/xxx/x.html", result.ElementAt(0).AbsoluteUri);
            Assert.AreEqual("http://a.com/yyy/y.html", result.ElementAt(1).AbsoluteUri);
        }
Ejemplo n.º 12
0
        public void GetLinks_NamedAnchorsOrHashbangs_Enabled_ReturnsLinks()
        {
            _unitUnderTest            = GetInstance(false, false, null, true, false);
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/aaa/a.html#top\" ></a><a href=\"/aaa/a.html#bottom\" /></a><a href=\"/aaa/a.html/#someaction/someid\" /></a>";

            IEnumerable <Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.AreEqual(4, result.Count());
            Assert.AreEqual("http://a.com/aaa/a.html", result.ElementAt(0).AbsoluteUri);
            Assert.AreEqual("http://a.com/aaa/a.html#top", result.ElementAt(1).AbsoluteUri);
            Assert.AreEqual("http://a.com/aaa/a.html#bottom", result.ElementAt(2).AbsoluteUri);
            Assert.AreEqual("http://a.com/aaa/a.html/#someaction/someid", result.ElementAt(3).AbsoluteUri);
        }
Ejemplo n.º 13
0
        public void GetLinks_HttpXRobotsNoFollow_ReturnsEmptyList()
        {
            _crawledPage.HttpWebResponse.Headers.Add(new NameValueCollection()
            {
                { "X-Robots-Tag", "nofollow" }
            });
            _unitUnderTest            = GetInstance(false, false, null, false, true);
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            IEnumerable <Uri> result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(0, result.Count());
        }
Ejemplo n.º 14
0
        public void GetLinks_HttpXRobotsTagHeaderNoIndex_ReturnsLinks()
        {
            _crawledPage.HttpResponseMessage.Headers.Add("X-Robots-Tag", new List <string>()
            {
                "noindex"
            });
            _unitUnderTest            = GetInstance(false, false, null, false, true);
            _crawledPage.Content.Text = "<a href=\"/aaa/a.html\" ></a><a href=\"/bbb/b.html\" /></a>";

            var result = _unitUnderTest.GetLinks(_crawledPage);

            Assert.IsNotNull(result);
            Assert.AreEqual(2, result.Count());
        }
Ejemplo n.º 15
0
        public NicoNicoUserEntry GetUserInfo()
        {
            Owner.Status = "ユーザー情報取得中";
            var ret = new NicoNicoUserEntry();

            //ユーザーページのhtmlを取得
            var a = NicoNicoWrapperMain.Session.GetAsync(UserPage).Result;

            //htmlをロード
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml2(a);

            //ユーザープロファイル
            HtmlNode detail  = doc.DocumentNode.SelectSingleNode("//div[@class='userDetail']");
            HtmlNode profile = detail.SelectSingleNode("child::div[@class='profile']");
            HtmlNode account = profile.SelectSingleNode("child::div[@class='account']");

            ret.UserIconUrl = detail.SelectSingleNode("child::div[@class='avatar']/img").Attributes["src"].Value;
            ret.UserName    = profile.SelectSingleNode("child::h2").InnerText.Trim();
            ret.Id          = account.SelectSingleNode("child::p[@class='accountNumber']").InnerText.Trim();
            ret.Gender      = account.SelectSingleNode("child::p[2]").InnerText.Trim();
            ret.BirthDay    = account.SelectSingleNode("child::p[3]").InnerText.Trim();
            ret.Region      = account.SelectSingleNode("child::p[4]").InnerText.Trim();

            var temp = profile.SelectSingleNode("child::ul[@class='userDetailComment channel_open_mt0']/li/p/span");

            ret.Description = temp == null ? "" : temp.InnerHtml;

            ret.UserPage = UserPage;

            //URLをハイパーリンク化する
            ret.Description = HyperLinkParser.Parse(ret.Description);

            Owner.Status = "";
            return(ret);
        }
Ejemplo n.º 16
0
        public NicoNicoPublicMylistEntry GetMylist()
        {
            try {
                var a = NicoNicoWrapperMain.Session.GetAsync(MylistUrl).Result;

                //該当JavaScriptの部分から取得
                var globals = a.Substring(a.IndexOf("Jarty.globals("));

                //改行で分割
                var splitted = globals.Split('\n');

                //正規表現でダブルクォーテ内の名前を取得
                var regex = new Regex("\"(.*)\"");

                string nickname    = null;
                string userid      = null;
                string mylistname  = null;
                string description = null;

                string json = null;

                foreach (var text in splitted)
                {
                    //マイリストオーナーだったら
                    if (text.Contains("mylist_owner:"))
                    {
                        //マイリストオーナーのニックネームを取得
                        nickname = text.Substring(text.IndexOf("nickname: "));

                        var match = regex.Match(nickname);

                        //グループから取得
                        nickname = match.Groups[1].Value;
                        continue;
                    }

                    if (nickname != null && text.Contains("user_id:"))
                    {
                        userid = new Regex(@"\d+").Match(text).Value;
                        continue;
                    }

                    if (userid != null && text.Contains("name:"))
                    {
                        mylistname = regex.Match(text).Groups[1].Value;
                        continue;
                    }
                    if (mylistname != null && text.Contains("description:"))
                    {
                        description = regex.Match(text).Groups[1].Value;
                        continue;
                    }
                    if (description != null && text.Contains("Mylist.preload("))
                    {
                        //Json取得
                        json = text.Substring(text.IndexOf(",") + 1, text.Length - text.IndexOf(",") - 3);
                        break;
                    }
                }

                var ret = new NicoNicoPublicMylistEntry();

                ret.NickName    = @"<a href=""http://www.nicovideo.jp/user/" + userid + @""">" + nickname + "</a> さんの公開マイリスト";
                ret.MylistName  = mylistname;
                ret.Description = description;

                //\nを改行に置換
                ret.Description = ret.Description.Replace("\\n", "<br>").Replace("\\r", "");

                ret.Description = HyperLinkParser.Parse(ret.Description);

                var list = new List <MylistListEntryViewModel>();

                var data = DynamicJson.Parse(json);

                StoreItem(data, list);

                ret.Data = list;

                return(ret);
            } catch (RequestTimeout) {
                return(null);
            }
        }
Ejemplo n.º 17
0
        //動画ページを指定
        public static WatchApiData GetWatchApiData(string videoPage)
        {
            //動画ページのhtml取得
            var response = NicoNicoWrapperMain.GetSession().GetResponseAsync(videoPage).Result;

            //チャンネル、公式動画
            if (response.StatusCode == HttpStatusCode.MovedPermanently)
            {
                response = NicoNicoWrapperMain.GetSession().GetResponseAsync(response.Headers.Location.OriginalString).Result;
            }
            //削除された動画
            if (response.StatusCode == HttpStatusCode.NotFound)
            {
                return(null);
            }
            //混雑中
            if (response.StatusCode == HttpStatusCode.ServiceUnavailable)
            {
                return(null);
            }

            string html = response.Content.ReadAsStringAsync().Result;

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml2(html);

            //htmlからAPIデータだけを綺麗に抜き出す すごい
            var container = doc.DocumentNode.QuerySelector("#watchAPIDataContainer");

            if (container == null)
            {
                return(null);
            }

            var data = container.InnerHtml;

            //html特殊文字をデコードする
            data = HttpUtility.HtmlDecode(data);

            //jsonとしてAPIデータを展開していく
            var json = DynamicJson.Parse(data);

            //GetFlvの結果
            string flv = json.flashvars.flvInfo;

            //2重にエンコードされてるので二回
            flv = HttpUtility.UrlDecode(flv);
            flv = HttpUtility.UrlDecode(flv);


            WatchApiData ret = new WatchApiData();

            //&で繋がれているので剥がす
            var getFlv = flv.Split(new char[] { '&' }).ToDictionary(source => source.Substring(0, source.IndexOf('=')),
                                                                    source => Uri.UnescapeDataString(source.Substring(source.IndexOf('=') + 1)));

            ret.GetFlv = new NicoNicoGetFlvData(getFlv);

            //動画情報
            var videoDetail = json.videoDetail;

            //---情報を詰める---
            ret.Cmsid          = videoDetail.id;
            ret.MovieType      = json.flashvars.movie_type;
            ret.Title          = HttpUtility.HtmlDecode(videoDetail.title); //html特殊文字をデコード
            ret.Thumbnail      = videoDetail.thumbnail;
            ret.Description    = videoDetail.description;
            ret.PostedAt       = videoDetail.postedAt;
            ret.Length         = (int)videoDetail.length;
            ret.ViewCounter    = (int)videoDetail.viewCount;
            ret.CommentCounter = (int)videoDetail.commentCount;
            ret.MylistCounter  = (int)videoDetail.mylistCount;
            ret.YesterdayRank  = videoDetail.yesterday_rank == null ? "圏外" : videoDetail.yesterday_rank + "位";
            ret.HighestRank    = videoDetail.highest_rank == null ? "圏外" : videoDetail.highest_rank + "位";
            ret.Token          = json.flashvars.csrfToken;

            if (json.uploaderInfo())
            {
                //投稿者情報
                var uploaderInfo = json.uploaderInfo;

                ret.UploaderId          = uploaderInfo.id;
                ret.UploaderIconUrl     = uploaderInfo.icon_url;
                ret.UploaderName        = uploaderInfo.nickname;
                ret.UploaderIsFavorited = uploaderInfo.is_favorited;
            }
            else if (json.channelInfo())
            {
                //投稿者情報
                var channelInfo = json.channelInfo;

                ret.UploaderId          = channelInfo.id;
                ret.UploaderIconUrl     = channelInfo.icon_url;
                ret.UploaderName        = channelInfo.name;
                ret.UploaderIsFavorited = channelInfo.is_favorited == 1 ? true : false;

                ret.IsChannelVideo = true;
            }


            ret.Description = HyperLinkParser.Parse(ret.Description);

            ret.TagList = new ObservableCollection <VideoTagViewModel>();

            foreach (var tag in videoDetail.tagList)
            {
                NicoNicoTag entry = new NicoNicoTag()
                {
                    Id  = tag.id,
                    Tag = tag.tag,
                    Dic = tag.dic(),
                    Lck = tag.lck == "1" ? true : false,
                    Cat = tag.cat()
                };
                ret.TagList.Add(new VideoTagViewModel(entry));
            }
            //------

            //有料動画
            if (ret.GetFlv.VideoUrl == null || ret.GetFlv.VideoUrl.Count() == 0)
            {
                ret.IsPaidVideo = true;
                return(ret);
            }

            //念のためCookieを設定
            var cookie = response.Headers.GetValues("Set-Cookie");

            foreach (string s in cookie)
            {
                foreach (string ss in s.Split(';'))
                {
                    App.SetCookie(new Uri("http://nicovideo.jp/"), ss);
                }
            }
            return(ret);
        }
Ejemplo n.º 18
0
 public SiteMapFinder()
 {
     _linkParser = new AngleSharpHyperlinkParser();
 }
Ejemplo n.º 19
0
        public List <NicoNicoUserMylistEntry> GetUserMylist()
        {
            var url = UserPage + "/mylist";

            Owner.Status = "ユーザーマイリスト取得中";

            List <NicoNicoUserMylistEntry> ret = new List <NicoNicoUserMylistEntry>();

            try {
                var a   = NicoNicoWrapperMain.Session.GetAsync(url).Result;
                var doc = new HtmlDocument();
                doc.LoadHtml2(a);

                var content = doc.DocumentNode.SelectSingleNode("//div[@class='content']");

                var outers = content.SelectNodes("child::div[@class='articleBody']/div[@class='outer']");

                //終了
                if (outers == null)
                {
                    Owner.Status = "";
                    return(null);
                }
                //ニコレポタイムライン走査
                foreach (var node in outers)
                {
                    NicoNicoUserMylistEntry entry = new NicoNicoUserMylistEntry();

                    //h4タグ
                    var h4 = node.SelectSingleNode("child::div/h4");

                    entry.Url = "http://www.nicovideo.jp/" + h4.SelectSingleNode("child::a").Attributes["href"].Value;

                    //名前取得
                    entry.Name = h4.SelectSingleNode("child::a").InnerText.Trim();

                    //説明文取得
                    var desc = node.SelectSingleNode("child::div/p[@data-nico-mylist-desc-full='true']");
                    entry.Description = desc == null ? "" : desc.InnerText.Trim();

                    entry.Description = HyperLinkParser.Parse(entry.Description);

                    //サムネイル取得
                    var thumb1 = node.SelectSingleNode("child::div/ul/li[1]/img");
                    var thumb2 = node.SelectSingleNode("child::div/ul/li[2]/img");
                    var thumb3 = node.SelectSingleNode("child::div/ul/li[3]/img");

                    if (thumb1 != null)
                    {
                        entry.ThumbNail1Available = true;
                        entry.ThumbNail1Url       = thumb1.Attributes["src"].Value;
                        entry.ThumbNail1ToolTip   = HttpUtility.HtmlDecode(thumb1.Attributes["alt"].Value);
                    }
                    else
                    {
                        goto next;
                    }

                    if (thumb2 != null)
                    {
                        entry.ThumbNail2Available = true;
                        entry.ThumbNail2Url       = thumb2.Attributes["src"].Value;
                        entry.ThumbNail2ToolTip   = HttpUtility.HtmlDecode(thumb2.Attributes["alt"].Value);
                    }
                    else
                    {
                        goto next;
                    }

                    if (thumb3 != null)
                    {
                        entry.ThumbNail3Available = true;
                        entry.ThumbNail3Url       = thumb3.Attributes["src"].Value;
                        entry.ThumbNail3ToolTip   = HttpUtility.HtmlDecode(thumb3.Attributes["alt"].Value);
                    }

next:
                    ret.Add(entry);
                }

                Owner.Status = "";
                return(ret);
            } catch (RequestTimeout) {
                Owner.Status = "ユーザーマイリストの取得に失敗しました";
                return(null);
            }
        }