コード例 #1
0
        public void DeleteTest()
        {
            // arrange
            ValidUrl temp1 = new ValidUrl
            {
                Id           = Guid.NewGuid(),
                SiteId       = 0,
                Action       = "/Template/Compose",
                Active       = true,
                FriendlyUrl  = "/flights/v5",
                Index        = true,
                StatusCode   = 200,
                View         = "/home/flights.cshtml",
                LastModified = DateTime.Now
            };

            _mongodbRepo.Save(temp1);

            // act
            _mongodbRepo.Delete(temp1);

            // assert
            var actual = _mongodbRepo.GetByFriendlyUrl(temp1.SiteId, temp1.FriendlyUrl);

            Assert.IsNull(actual);
        }
コード例 #2
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = RecommendOption(url);
            }

            var html   = NetTools.DownloadString(url);
            var result = new List <NetTask>();

            foreach (var img in JObject.Parse((new Regex("item: ({.*})")).Match(html).Groups[1].Value)["album_images"]["images"])
            {
                var hash = img["hash"].ToString();
                var ext  = img["ext"].ToString();
                var task = NetTask.MakeDefault($"https://i.imgur.com/{hash}{ext}");
                task.SaveFile = true;
                task.Filename = $"{hash}{ext}";
                task.Format   = new ExtractorFileNameFormat {
                    Id = hash, Extension = ext, FilenameWithoutExtension = hash, Url = url
                };
                result.Add(task);
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.Search
            });
        }
コード例 #3
0
        public ActionResult UpdateUrl(ValidUrl url_)
        {
            string result = string.Empty;

            try
            {
                url_.LastModified    = DateTime.Now;
                url_.LastModifiedBy  = this.CMSUser.UserName;
                url_.SitemapPriority = float.Parse(url_.SitemapPriority.ToString("N1"));
                url_.Action          = ECMSSettings.Current.DefaultURLRewriteAction;
                url_.SiteId          = (short)this.GetSiteIdFromContext();
                if (url_.Id == Guid.Empty)
                {
                    url_.Id = Guid.NewGuid();
                    DependencyManager.URLRepository.Save(url_);
                }
                else
                {
                    DependencyManager.URLRepository.Update(url_);
                }

                result = "Url Updated Successfully.";
            }
            catch (Exception ex)
            {
                Response.Clear();
                Response.ClearHeaders();
                Response.ClearContent();
                Response.StatusCode        = 500;
                Response.StatusDescription = "Failed : " + ex.Message;
            }
            return(Json(result));
        }
コード例 #4
0
        public void UpdateTest()
        {
            // arrange
            ValidUrl temp1 = new ValidUrl
            {
                Id           = Guid.NewGuid(),
                SiteId       = 0,
                Action       = "/Template/Compose",
                Active       = true,
                FriendlyUrl  = "/flights/",
                Index        = true,
                StatusCode   = 200,
                View         = "/home/flights.cshtml",
                LastModified = DateTime.Now
            };

            _mongodbRepo.Save(temp1);

            ValidUrl expected = _mongodbRepo.GetByFriendlyUrl(temp1.SiteId, temp1.FriendlyUrl);

            expected.FriendlyUrl = "/flights/v1";
            expected.StatusCode  = 200;
            expected.Active      = false;

            // act
            _mongodbRepo.Update(expected);
            ValidUrl actual = _mongodbRepo.GetByFriendlyUrl(expected.SiteId, expected.FriendlyUrl);

            // assert
            Assert.AreEqual(actual.FriendlyUrl, expected.FriendlyUrl);
            Assert.AreEqual(actual.StatusCode, expected.StatusCode);
            Assert.AreEqual(actual.Active, expected.Active);
        }
コード例 #5
0
        public override IExtractorOption RecommendOption(string url)
        {
            var match = ValidUrl.Match(url).Groups;

            if (match[1].Value == "comic")
            {
                if (match[4].Value == "detail")
                {
                    return(new NaverExtractorOption {
                        Type = NaverExtractorOption.ExtractorType.EpisodeImages
                    });
                }
                else if (match[4].Value == "list")
                {
                    return(new NaverExtractorOption {
                        Type = NaverExtractorOption.ExtractorType.ComicIndex
                    });
                }
            }
            else if (match[1].Value == "blog")
            {
                return(new NaverExtractorOption {
                    Type = NaverExtractorOption.ExtractorType.Images
                });
            }

            return(new NaverExtractorOption {
                Type = NaverExtractorOption.ExtractorType.Images
            });
        }
コード例 #6
0
ファイル: MongoDBRepository.cs プロジェクト: ramkumar013/ECMS
        public override ContentItem GetById(ValidUrl url_, ContentViewType viewType_)
        {
            //ContentItem item = _db.GetCollection<ContentItem>(COLLNAME).AsQueryable<ContentItem>().Where(x => x.Url.Id == url_.Id && x.ContentView.ViewType == viewType_).FirstOrDefault<ContentItem>();
            ContentItem item = _db.GetCollection <ContentItem>(COLLNAME).Find(Query.And(Query.EQ("Url.Id", url_.Id), Query.EQ("ViewType", Convert.ToInt32(viewType_)))).FirstOrDefault <ContentItem>();

            if (item == null)
            {
                DependencyManager.Logger.Log(new LogEventInfo(LogLevel.Debug, ECMSSettings.DEFAULT_LOGGER, "Specific content not found now going to search for default content."));
                item = _db.GetCollection <ContentItem>(COLLNAME).Find(Query.And(Query.EQ("ContentView.SiteId", url_.SiteId), Query.EQ("ContentView.ViewName", url_.View), Query.EQ("ContentView.ViewType", Convert.ToInt32(viewType_)))).FirstOrDefault <ContentItem>();
            }

            //TODO : Optimize this
            if (item != null)
            {
                using (StringReader streamReader = new StringReader(item.Body[0].ToString()))
                {
                    using (var csv = new CsvHelper.CsvReader(streamReader))
                    {
                        //csv.Configuration.IgnoreQuotes = true;
                        csv.Read();
                        item.Body = JObject.FromObject(csv.GetRecord(typeof(object)));
                    }
                }
            }
            return(item);
        }
コード例 #7
0
 public static MvcHtmlString GetHref(string url_, string hrefTemplate_)
 {
     try
     {
         ValidUrl vu = DependencyManager.URLRepository.GetByFriendlyUrl(1, url_);
         if (vu != null && vu.StatusCode == 200)
         {
             return(new MvcHtmlString(string.Format(hrefTemplate_, vu.FriendlyUrl)));
         }
         else
         {
             Match match = hrefRegex.Match(hrefTemplate_);
             if (match != null && match.Groups != null && match.Groups.Count > 0 && match.Groups["name"] != null)
             {
                 return(new MvcHtmlString(match.Groups["name"].Value));
             }
             else
             {
                 return(emptyMVCHtmlString);
             }
         }
     }
     catch (Exception ex)
     {
         LogEventInfo info = new LogEventInfo(LogLevel.Error, ECMSSettings.DEFAULT_LOGGER, ex.ToString());
         DependencyManager.Logger.Log(info);
         return(emptyMVCHtmlString);
     }
 }
コード例 #8
0
        public override ContentItem GetById(ValidUrl url_, ContentViewType viewType_)
        {
            ContentItem item = new ContentItem();

            item.Url = url_;
            JObject jsonBody = LoadPageContents(url_, viewType_, true);

            item.Body = jsonBody;
            item.Head = GetHeadContentByViewName(url_, jsonBody, viewType_);

            string temp2 = null;

            foreach (JToken token in jsonBody.Children())
            {
                if (token is JProperty)
                {
                    temp2 = (token as JProperty).Value.ToString();
                    if (temp2.Contains("@"))
                    {
                        string hashCode = temp2.GetHashCode().ToString();
                        if (DependencyManager.CachingService.Get <ITemplate>(hashCode) == null)
                        {
                            var task = Task.Factory.StartNew(() => CreateTemplateAndSetInCache(hashCode, (token as JProperty).Value.ToString()));
                            DependencyManager.CachingService.Set <Task>("Task." + hashCode, task);
                        }
                    }
                }
            }
            return(item);
        }
コード例 #9
0
        public ActionResult UrlDataEdit(Guid id, ContentItem item_, ContentViewType vm)
        {
            ValidUrl url = DependencyManager.URLRepository.GetById(this.GetSiteIdFromContext(), id, false);

            item_.Url = url;
            DependencyManager.ContentRepository.Save(item_, vm);
            return(RedirectToAction("Index", "Urls"));
        }
コード例 #10
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = RecommendOption(url);
            }

            if (match["type"].Value == "reader")
            {
                var id = match["id"].Value;
                var article_info_url = $"https://hiyobi.me/info/{id}";
                option.PageReadCallback?.Invoke(article_info_url);
                var info_html = NetTools.DownloadString(article_info_url);
                var data      = parse_info(info_html);

                var img_file_json_url = $"https://xn--9w3b15m8vo.asia/data/json/{id}_list.json";
                option.PageReadCallback?.Invoke(img_file_json_url);
                var cookie             = "__cfduid=d53c18b351d4a54007ac583a96f4436381568466715";
                var img_file_json_task = NetTask.MakeDefault(img_file_json_url, cookie);
                var img_file_json      = NetTools.DownloadString(img_file_json_task);
                var img_urls           = JArray.Parse(img_file_json).Select(x => $"https://xn--9w3b15m8vo.asia/data/{id}/{x["name"].ToString()}").ToList();

                option.SimpleInfoCallback?.Invoke($"{data.Title}");

                var result = new List <NetTask>();
                var count  = 1;
                foreach (var img in img_urls)
                {
                    var task = NetTask.MakeDefault(img);
                    task.SaveFile = true;
                    task.Filename = img.Split('/').Last();
                    task.Cookie   = cookie;
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Id     = id,
                        Title  = data.Title,
                        Artist = data.artist != null ? data.artist[0] : "N/A",
                        Group  = data.artist != null ? data.artist[0] : "N/A",
                        FilenameWithoutExtension = count++.ToString("000"),
                        Extension = Path.GetExtension(task.Filename).Replace(".", "")
                    };
                    result.Add(task);
                }
                option.ThumbnailCallback?.Invoke(result[0]);
                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, new ExtractedInfo {
                    Type = ExtractedInfo.ExtractedType.WorksComic
                });
            }
            else
            {
                throw new ExtractorException("'search' page not supports yet!");
            }
        }
コード例 #11
0
        public ContentItemHead GetHeadContentByViewName(ValidUrl url_, JObject jsonBody, ContentViewType viewType_)
        {
            JObject jsonHead = ContentHeadList[url_.SiteId.ToString() + "-" + Convert.ToInt32(viewType_).ToString() + "-" + url_.View.Trim(new char[] { '/' })];

            jsonHead.MergeInto(jsonBody);
            ContentItemHead itemhead = new ContentItemHead();

            itemhead.LoadFromJObject(jsonHead);
            return(itemhead);
        }
コード例 #12
0
        private string ConstructPath(ValidUrl url_, ContentViewType viewType_, bool forBodyContent_)
        {
            string filePath = AppDomain.CurrentDomain.BaseDirectory + "\\app_data\\" + url_.SiteId + "\\" + Convert.ToInt32(viewType_).ToString() + (forBodyContent_ ? "\\bodycontent\\" : "\\headcontent\\") + url_.Id + ECMS_FILE_EXTENSION;

            //if (!File.Exists(filePath))
            //{
            //    filePath = AppDomain.CurrentDomain.BaseDirectory + "\\app_data\\" + url_.SiteId + "\\" + Convert.ToInt32(viewType_).ToString() + (forBodyContent_ ? "\\bodycontent\\" : "\\headcontent\\") + url_.View + "-default-content" + ECMS_FILE_EXTENSION;
            //}
            return(filePath);
        }
コード例 #13
0
        //private static void LoadPageContents(DirectoryInfo dirInfo)
        //{
        //    using (StreamReader streamReader = new StreamReader(dirInfo.FullName + "\\content.etxt"))
        //    {
        //        using (var csv = new CsvReader(streamReader))
        //        {
        //            ContentBodyList = new Dictionary<int, Dictionary<Guid, JObject>>();
        //            var temp = new Dictionary<Guid, JObject>();
        //            while (csv.Read())
        //            {
        //                temp[Guid.Parse(csv.GetField("UrlId"))] = JObject.FromObject(csv.GetRecord<object>());
        //            }
        //            ContentBodyList[Convert.ToInt32(dirInfo.Name)] = temp;
        //        }
        //    }
        //}

        private JObject LoadPageContents(ValidUrl url_, ContentViewType viewType_, bool forBodyContent_)
        {
            string filePath = ConstructPath(url_, viewType_, forBodyContent_);

            if (!File.Exists(filePath))
            {
                ECMSView view = DependencyManager.ViewRepository.GetByViewName(url_.View);
                filePath = ConstructPath(view, forBodyContent_);
            }
            return(ReadPageContentFromDisk(filePath));
        }
コード例 #14
0
        public void GetById_InProcCachingService_Test()
        {
            DependencyManager.CachingService = new InProcCachingService();
            ValidUrlFileRepository fileRepository = new ValidUrlFileRepository();
            ValidUrl url = fileRepository.GetByFriendlyUrl(1, "/flights/cheap-flights-to-new-york-city");

            Assert.AreEqual("/flights/destination-city", url.View);
            Assert.AreEqual(true, url.Active);
            Assert.AreEqual(true, url.Index);
            Assert.AreEqual(200, url.StatusCode);
        }
コード例 #15
0
ファイル: MongoDBRepository.cs プロジェクト: ramkumar013/ECMS
        public override ContentItem GetContentForEditing(ValidUrl url_, ContentViewType viewType_)
        {
            //ContentItem item = _db.GetCollection<ContentItem>(COLLNAME).AsQueryable<ContentItem>().Where(x => x.Url.Id == url_.Id && x.Url.View == url_.View && Convert.ToInt32(x.ContentView.ViewType) == Convert.ToInt32(viewType_)).FirstOrDefault<ContentItem>();
            ContentItem item = _db.GetCollection <ContentItem>(COLLNAME).Find(Query.And(Query.EQ("Url.Id", url_.Id), Query.EQ("ContentView.ViewName", url_.View), Query.EQ("ContentView.ViewType", Convert.ToInt32(viewType_)))).FirstOrDefault <ContentItem>();

            if (item != null)
            {
                item.Body = item.Body[0];
            }
            return(item);
        }
コード例 #16
0
        public void GetById_InProcCachingService_SlashURL_Test()
        {
            DependencyManager.CachingService = new InProcCachingService();
            ValidUrlFileRepository fileRepository = new ValidUrlFileRepository();
            ValidUrl url = fileRepository.GetByFriendlyUrl(1, "/");

            Assert.AreEqual("/index", url.View);
            Assert.AreEqual(true, url.Active);
            Assert.AreEqual(true, url.Index);
            Assert.AreEqual(200, url.StatusCode);
        }
コード例 #17
0
        public void SaveTest()
        {
            ValidUrl expected = new ValidUrl
            {
                Id           = Guid.NewGuid(),
                SiteId       = 0,
                Action       = "/Template/Compose",
                Active       = true,
                FriendlyUrl  = "/flights/",
                Index        = true,
                StatusCode   = 200,
                View         = "/home/flights.cshtml",
                LastModified = DateTime.Now
            };

            _mongodbRepo.Save(expected);
        }
コード例 #18
0
        public override IExtractorOption RecommendOption(string url)
        {
            var match = ValidUrl.Match(url).Groups;

            if (match["type"].Value == "board")
            {
                return new NaverExtractorOption {
                           Type = ExtractorType.EpisodeImages
                }
            }
            ;
            else
            {
                return new NaverExtractorOption {
                           Type = ExtractorType.Works
                }
            };
        }
コード例 #19
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = RecommendOption(url);
            }

            var html = NetTools.DownloadString(url);

            //
            //  Extract Webtoon
            //

            if (option.Type == ExtractorType.EpisodeImages)
            {
                return(extract_episode_page(html), null);
            }
            else if (option.Type == ExtractorType.ComicIndex)
            {
                var match  = ValidUrl.Match(url).Groups;
                var max_no = Regex.Match(html, @"/webtoon/detail\.nhn\?titleId=\d+&no=(\d+)").Groups[1].Value.ToInt();
                var urls   = new List <string>();
                for (int i = 1; i <= max_no; i++)
                {
                    urls.Add($"https://comic.naver.com/webtoon/detail.nhn?titleId={match["id"]}&no={i}");
                }

                var htmls  = NetTools.DownloadStrings(urls);
                var result = new List <NetTask>();

                foreach (var shtml in htmls)
                {
                    result.AddRange(extract_episode_page(shtml));
                }

                return(result, new ExtractedInfo {
                    Type = ExtractedInfo.ExtractedType.WorksComic
                });
            }

            return(null, null);
        }
コード例 #20
0
        public void GetByFriendlyUrlTest()
        {
            ValidUrl expected = new ValidUrl
            {
                Id           = Guid.NewGuid(),
                SiteId       = 0,
                Action       = "/Template/Compose",
                Active       = true,
                FriendlyUrl  = "/flights/",
                Index        = true,
                StatusCode   = 200,
                View         = "/home/flights.cshtml",
                LastModified = DateTime.Now
            };

            _mongodbRepo.Save(expected);

            ValidUrl actual = _mongodbRepo.GetByFriendlyUrl(expected.SiteId, expected.FriendlyUrl);

            Assert.AreEqual(actual.Id, expected.Id);
        }
コード例 #21
0
        public override ContentItem GetContentForEditing(ValidUrl url_, ContentViewType viewType_)
        {
            string      bodyContentFilePath = ConstructPath(url_, viewType_, true);
            string      headContentFilePath = ConstructPath(url_, viewType_, false);
            ContentItem contentItem         = new ContentItem();

            using (StreamReader streamReader = new StreamReader(headContentFilePath))
            {
                using (var csv = new CsvReader(streamReader))
                {
                    while (csv.Read())
                    {
                        contentItem.Head = csv.GetRecord <ContentItemHead>();
                    }
                }
            }
            if (File.Exists(bodyContentFilePath))
            {
                contentItem.Body = (dynamic)File.ReadAllText(bodyContentFilePath);
            }
            return(contentItem);
        }
コード例 #22
0
        public override IExtractorOption RecommendOption(string url)
        {
            var match = ValidUrl.Match(url).Groups;

            if (match["type"].Value == "reader")
            {
                return new HiyobiExtractorOption {
                           Type = ExtractorType.Images
                }
            }
            ;
            else if (match["type"].Value == "search")
            {
                return new HiyobiExtractorOption {
                           Type = ExtractorType.ComicIndex
                }
            }
            ;
            return(new HiyobiExtractorOption {
                Type = ExtractorType.Images
            });
        }
コード例 #23
0
        public ActionResult UrlDataEdit(Guid id, ContentViewType vm)
        {
            ValidUrl url = DependencyManager.URLRepository.GetById(this.GetSiteIdFromContext(), id, false);

            ViewBag.ViewName = url.FriendlyUrl;
            ViewBag.ViewType = vm;
            ContentItem item = null;

            try
            {
                item = DependencyManager.ContentRepository.GetContentForEditing(url, vm);
            }
            catch (FileNotFoundException) { }
            if (item != null)
            {
                return(View(GetControllerView("DefaultDataEdit"), item));
            }
            else
            {
                return(View(GetControllerView("DefaultDataEdit")));
            }
        }
コード例 #24
0
        public static string Eval(string expression)
        {
            try
            {
                string hashCode = expression.GetHashCode().ToString();
                var    task     = DependencyManager.CachingService.Get <Task>("Task." + hashCode);
                if (task != null && !task.IsCompleted)
                {
                    task.Wait();
                }

                TemplateService service = new TemplateService();
                return(service.Run(DependencyManager.CachingService.Get <ITemplate>(hashCode), null));
            }
            catch (Exception ex)
            {
                ValidUrl     validurl = Utility.GetValidUrlFromContext(new HttpContextWrapper(HttpContext.Current));
                string       url      = validurl != null ? validurl.FriendlyUrl + "::" + validurl.Id.ToString() + "::" : string.Empty;
                LogEventInfo info     = new LogEventInfo(LogLevel.Error, ECMSSettings.DEFAULT_LOGGER, url + ex.ToString());
                DependencyManager.Logger.Log(info);
                return(string.Empty);
            }
        }
コード例 #25
0
        public override IExtractorOption RecommendOption(string url)
        {
            var match = ValidUrl.Match(url).Groups;

            if (match[1].Value == "gall")
            {
                if (match[3].Value == "view")
                {
                    return(new DCInsideExtractorOption {
                        Type = ExtractorType.Images
                    });
                }
                else if (match[3].Value == "lists")
                {
                    return(new DCInsideExtractorOption {
                        Type = ExtractorType.ArticleInformation, ExtractInformation = true
                    });
                }
            }

            return(new DCInsideExtractorOption {
                Type = ExtractorType.Images
            });
        }
コード例 #26
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = RecommendOption(url);
            }

            var html  = NetTools.DownloadString(url);
            var match = ValidUrl.Match(url).Groups;

            var document = new HtmlDocument();

            document.LoadHtml(html);
            var node = document.DocumentNode;

            if (option.Type == ExtractorType.EpisodeImages)
            {
                var images = get_board_images(html);
                var title  = node.SelectSingleNode("/html[1]/head[1]/title[1]").InnerText;

                var result = new List <NetTask>();
                int count  = 1;
                foreach (var img in images)
                {
                    var task = NetTask.MakeDefault(img);
                    task.SaveFile = true;
                    task.Filename = count.ToString("000") + Path.GetExtension(img.Split('/').Last());
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Episode = title,
                        FilenameWithoutExtension = count.ToString("000"),
                        Extension = Path.GetExtension(task.Filename).Replace(".", "")
                    };
                    result.Add(task);
                    count++;
                }

                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, null);
            }
            else if (option.Type == ExtractorType.Works)
            {
                var title      = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]").InnerText;
                var sub_urls   = new List <string>();
                var sub_titles = new List <string>();

                option.SimpleInfoCallback?.Invoke($"{title}");

                option.ThumbnailCallback?.Invoke(NetTask.MakeDefault(
                                                     Regex.Match(node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]").GetAttributeValue("style", ""), @"(https?://.*?)\)").Groups[1].Value));

                foreach (var item in node.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]/div[1]/div[1]/div"))
                {
                    sub_urls.Add(match["host"] + item.SelectSingleNode("./a[1]").GetAttributeValue("href", ""));
                    sub_titles.Add(item.SelectSingleNode("./a[1]/div[1]").MyText());
                }

                option.ProgressMax?.Invoke(sub_urls.Count);

                var htmls = NetTools.DownloadStrings(sub_urls, "PHPSESSID=" + Externals.ManamoaPHPSESSID, () =>
                {
                    option.PostStatus?.Invoke(1);
                });

                var result = new List <NetTask>();
                for (int i = 0; i < sub_urls.Count; i++)
                {
                    try
                    {
                        var images = get_board_images(htmls[i]);
                        int count  = 1;
                        foreach (var img in images)
                        {
                            var task = NetTask.MakeDefault(img);
                            task.SaveFile = true;
                            task.Filename = count.ToString("000") + Path.GetExtension(img.Split('/').Last());
                            task.Format   = new ExtractorFileNameFormat
                            {
                                Title   = title,
                                Episode = sub_titles[i],
                                FilenameWithoutExtension = count.ToString("000"),
                                Extension = Path.GetExtension(task.Filename).Replace(".", "")
                            };
                            result.Add(task);
                            count++;
                        }
                    }
                    catch (Exception e)
                    {
                        ;
                    }
                }

                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, new ExtractedInfo {
                    Type = ExtractedInfo.ExtractedType.WorksComic
                });
            }

            return(null, null);
        }
コード例 #27
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = RecommendOption(url);
            }

            if (option.Type == ExtractorType.Images)
            {
                var sinfo    = new ExtractedInfo.WorksComic();
                var imgs_url = $"https://ltn.hitomi.la/galleries/{match["id"].Value}.js";
                option.PageReadCallback?.Invoke($"https://ltn.hitomi.la/galleryblock/{match["id"]}.html");
                option.PageReadCallback?.Invoke(url);
                option.PageReadCallback?.Invoke(imgs_url);
                var urls = new List <string> {
                    $"https://ltn.hitomi.la/galleryblock/{match["id"]}.html",
                    imgs_url
                };

                var strings = NetTools.DownloadStrings(urls);

                if (string.IsNullOrEmpty(strings[0]) || string.IsNullOrEmpty(strings[1]))
                {
                    return(null, null);
                }

                var data1 = ParseGalleryBlock(strings[0]);
                var imgs  = strings[1];

                var string2 = NetTools.DownloadString($"https://hitomi.la{data1.Magic}");
                if (string.IsNullOrEmpty(string2))
                {
                    return(null, null);
                }
                var data2 = ParseGallery(string2);

                option.SimpleInfoCallback?.Invoke($"[{match["id"].Value}] {data1.Title}");

                // download.js
                var number_of_frontends = 3;
                var subdomain           = Convert.ToChar(97 + (Convert.ToInt32(match["id"].Value.Last()) % number_of_frontends));
                if (match["id"].Value.Last() == '0')
                {
                    subdomain = 'a';
                }

                var vv  = JToken.Parse(imgs.Substring(imgs.IndexOf('=') + 1))["files"];
                var arr = (JArray)vv;
                //var arr = JArray.Parse(imgs.Substring(imgs.IndexOf('[')));
                var img_urls = new List <string>();
                foreach (var obj in arr)
                {
                    var hash = obj.Value <string>("hash");
                    if (obj.Value <int>("haswebp") == 0 || hash == null)
                    {
                        img_urls.Add($"https://{subdomain}a.hitomi.la/galleries/{match["id"].Value}/{obj.Value<string>("name")}");
                    }
                    else if (hash == "")
                    {
                        img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{obj.Value<string>("name")}.webp");
                    }
                    else if (hash.Length < 3)
                    {
                        img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{hash}.webp");
                    }
                    else
                    {
                        var postfix = hash.Substring(hash.Length - 3);
                        img_urls.Add($"https://{subdomain}a.hitomi.la/webp/{postfix[2]}/{postfix[0]}{postfix[1]}/{hash}.webp");
                    }
                }

                var result   = new List <NetTask>();
                var ordering = 1;
                foreach (var img in img_urls)
                {
                    var filename = Path.GetFileNameWithoutExtension(img.Split('/').Last());
                    if (!(option as HitomiExtractorOption).RealFilename)
                    {
                        filename = ordering++.ToString("000");
                    }

                    var task = NetTask.MakeDefault(img);
                    task.SaveFile = true;
                    task.Filename = img.Split('/').Last();
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Title      = data1.Title,
                        Id         = match["id"].Value,
                        Language   = data1.Language,
                        UploadDate = data1.Posted,
                        FilenameWithoutExtension = filename,
                        Extension = Path.GetExtension(img.Split('/').Last()).Replace(".", "")
                    };

                    if (data1.artist != null)
                    {
                        task.Format.Artist = data1.artist[0];
                    }
                    else
                    {
                        task.Format.Artist = "NA";
                    }

                    if (data1.parody != null)
                    {
                        task.Format.Series = data1.parody[0];
                    }
                    else
                    {
                        task.Format.Series = "NA";
                    }

                    if (data2.group != null)
                    {
                        task.Format.Group = data2.group[0];
                    }
                    else
                    {
                        task.Format.Group = "NA";
                    }

                    if (data2.character != null)
                    {
                        task.Format.Character = data2.character[0];
                    }
                    else
                    {
                        task.Format.Character = "NA";
                    }

                    if (task.Format.Artist == "NA" && task.Format.Group != "NA")
                    {
                        task.Format.Artist = task.Format.Group;
                    }

                    result.Add(task);
                }

                option.ThumbnailCallback?.Invoke(result[0]);

                sinfo.Thumbnail   = result[0];
                sinfo.URL         = url;
                sinfo.Title       = data1.Title;
                sinfo.Author      = data1.artist?.ToArray();
                sinfo.AuthorGroup = data2.group?.ToArray();
                sinfo.ShortInfo   = $"[{match["id"].Value}] {data1.Title}";
                sinfo.Tags        = data1.Tags?.ToArray();
                sinfo.Characters  = data2.character?.ToArray();
                sinfo.Language    = data1.Language;
                sinfo.Parodies    = data1.parody?.ToArray();

                result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                return(result, new ExtractedInfo {
                    Info = sinfo, Type = ExtractedInfo.ExtractedType.WorksComic
                });
            }

            return(null, null);
        }
コード例 #28
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            var match = ValidUrl.Match(url).Groups;

            if (option == null)
            {
                option = RecommendOption(url);
            }

            var mtask = NetTask.MakeDefault(url);

            mtask.Encoding = Encoding.GetEncoding(51949);
            var html   = NetTools.DownloadString(mtask);
            var node   = html.ToHtmlNode();
            var result = new List <NetTask>();

            var xcode = match["xcode"].Value;

            if (match["menu"].Value == "shopbrand" || match["menu"].Value == "bestseller")
            {
                var filtering_filename = new string[]
                {
                    "HN_Copyright2.jpg",
                    "next_product.gif",
                    "prev_product.gif",
                    "btn_h8_spin_dw.gif",
                    "btn_h8_spin_up.gif",
                    "Review.jpg",
                    "shoppingguide2.jpg",
                    "sizetip-2.jpg"
                };

                var gallery = node.SelectSingleNode("/html[1]/head[1]/title[1]").InnerText.Trim();
                option.SimpleInfoCallback?.Invoke(gallery);

                var last_page_node = node.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[3]/div[1]/div[5]/ol[1]/li[@class='last']/a");
                var last_page      = 1;
                if (last_page_node != null)
                {
                    last_page = node.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[3]/div[1]/div[5]/ol[1]/li[@class='last']/a").GetAttributeValue("href", "").Split('=').Last().ToInt();
                }
                var page_urls = Enumerable.Range(1, last_page).Select(page => $"{url}&page={page}").ToList();

                var htmls    = NetTools.DownloadStrings(page_urls);
                var sub_urls = new List <string>();

                foreach (var shtml in htmls)
                {
                    var snode = shtml.ToHtmlNode();
                    sub_urls.AddRange(snode.SelectNodes("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[3]/div[1]/div[5]/table[1]/tbody[1]//a").Select(x => "http://www.hn-hn.co.kr" + x.GetAttributeValue("href", "")));
                }

                option.ProgressMax?.Invoke(sub_urls.Count);

                var sub_htmls = new List <string>();
                foreach (var surl in sub_urls)
                {
                    var task = NetTask.MakeDefault(surl);
                    task.Encoding = Encoding.GetEncoding(51949);
                    sub_htmls.Add(NetTools.DownloadString(task));
                    option.PostStatus?.Invoke(1);
                }

                foreach (var shtml in sub_htmls)
                {
                    var snode     = shtml.ToHtmlNode();
                    var title     = snode.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/form[1]/div[1]/div[1]/h3[1]").InnerText.Trim();
                    var thumbnail = "http://www.hn-hn.co.kr" + snode.SelectSingleNode("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[1]/div[2]/div[1]/div[3]/div[1]/a[1]/img[1]").GetAttributeValue("src", "").Split('?')[0];
                    var imgs      = snode.SelectNodes("/html[1]/body[1]/div[3]/div[3]/div[1]/div[2]/div[1]/div[2]//img").Select(img =>
                    {
                        if (img.GetAttributeValue("src", "").StartsWith("http"))
                        {
                            return(img.GetAttributeValue("src", ""));
                        }
                        else
                        {
                            return("http://www.hn-hn.co.kr" + img.GetAttributeValue("src", "").Split('?')[0]);
                        }
                    }).ToList();

                    foreach (var img in imgs)
                    {
                        var task = NetTask.MakeDefault(img);
                        task.SaveFile = true;
                        task.Filename = img.Split('/').Last();
                        if (filtering_filename.Contains(task.Filename))
                        {
                            continue;
                        }
                        task.Format = new ExtractorFileNameFormat
                        {
                            Gallery = gallery,
                            Title   = title,
                            FilenameWithoutExtension = Path.GetFileNameWithoutExtension(task.Filename),
                            Extension = Path.GetExtension(task.Filename).Replace(".", "")
                        };
                        result.Add(task);
                    }
                }

                option.ThumbnailCallback?.Invoke(result[0]);
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.Search
            });
        }
コード例 #29
0
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = new DCInsideExtractorOption {
                    Type = ExtractorType.Images
                }
            }
            ;

            if ((option as DCInsideExtractorOption).OnlyRecommend)
            {
                url += "&exception_mode=recommend";
            }

            var match  = ValidUrl.Match(url).Groups;
            var result = new List <NetTask>();
            var html   = NetTools.DownloadString(url);

            if (html == null)
            {
                return(result, null);
            }

            if (match[1].Value == "gall")
            {
                try
                {
                    //
                    //  Parse article
                    //

                    if (match[3].Value == "view")
                    {
                        var article = ParseBoardView(html, match[2].Value != "");

                        if (option.Type == ExtractorType.Images && option.ExtractInformation == false)
                        {
                            if (article.ImagesLink == null || article.ImagesLink.Count == 0)
                            {
                                throw new Exception("Nothing to download!");
                            }

                            option.SimpleInfoCallback?.Invoke($"{article.Title}");

                            for (int i = 0; i < article.ImagesLink.Count; i++)
                            {
                                var task = NetTask.MakeDefault(article.ImagesLink[i]);
                                task.Filename = article.FilesName[i];
                                task.SaveFile = true;
                                task.Referer  = url;
                                task.Format   = new ExtractorFileNameFormat
                                {
                                    Id      = article.Id,
                                    Gallery = article.GalleryName,
                                    Title   = article.Title,
                                    FilenameWithoutExtension = (i + 1).ToString("000"),
                                    Extension = Path.GetExtension(article.FilesName[i]).Replace(".", ""),
                                };
                                result.Add(task);
                            }

                            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
                            return(result, null /*article*/);
                        }
                        else if (option.Type == ExtractorType.ArticleInformation || option.ExtractInformation == true)
                        {
                            return(null, null /*article*/);
                        }
                        else if (option.Type == ExtractorType.Comments)
                        {
                            var cc       = new List <DCComment>();
                            var comments = GetComments(article, "1");
                            cc.Add(comments);

                            //
                            //  To avoid server blocks
                            //

                            Thread.Sleep(2000);

                            int tcount = comments.total_cnt;
                            int count  = 100;

                            for (int i = 2; count < tcount; count += 100)
                            {
                                comments = GetComments(article, i.ToString());
                                if (comments.comment_cnt == 0)
                                {
                                    break;
                                }
                                count += comments.comment_cnt;
                                cc.Add(comments);
                                Thread.Sleep(2000);
                            }

                            return(null, null /*GetComments(article, "0")*/);
                        }
                        else
                        {
                            throw new Exception("You cannot do that with this URL. " + url);
                        }
                    }

                    //
                    //  Parse Articles List
                    //

                    else if (match[3].Value == "lists")
                    {
                        DCGallery gallery;

                        if (match[2].Value == "")
                        {
                            gallery = ParseGallery(html);
                        }
                        else
                        {
                            gallery = ParseMinorGallery(html);
                        }

                        if (option.Type == ExtractorType.GalleryInformation || option.ExtractInformation == true)
                        {
                            return(null, null /*gallery*/);
                        }
                        else
                        {
                            throw new Exception("You cannot do that with this URL." + url);
                        }
                    }
                }
                catch (Exception e)
                {
                    Log.Logs.Instance.PushError("[DCInsideExtractor] Extract error - " + option.Type.ToString() + " - " + e.Message + "\r\n" + e.StackTrace);
                }
            }
            else
            {
                // Not support mobile page.
                throw new ExtractorException("[DCInside Extractor] Not support mobile page yet.");
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.Community
            });
        }
コード例 #30
0
ファイル: JmanaExtractor.cs プロジェクト: rollrat/com-crawler
        public override (List <NetTask>, ExtractedInfo) Extract(string url, IExtractorOption option = null)
        {
            if (option == null)
            {
                option = RecommendOption(url);
            }

            var html  = NetTools.DownloadString(url);
            var match = ValidUrl.Match(url).Groups;

            var node = html.ToHtmlNode();

            var title  = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[1]/h1[1]").InnerText.Trim();
            var genre  = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[2]/div[2]/h3[1]/a[1]").InnerText.Trim();
            var artist = node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[2]/div[3]/h3[1]/a[1]").InnerText.Trim();

            var sub_urls   = new List <string>();
            var sub_titles = new List <string>();

            foreach (var episode in node.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div"))
            {
                var tag_a = episode.SelectSingleNode("./div[2]/h2[1]/a[1]");
                sub_urls.Add(tag_a.GetAttributeValue("href", ""));
                sub_titles.Add(tag_a.InnerText.Trim());
            }

            option.SimpleInfoCallback?.Invoke(title);
            option.ThumbnailCallback?.Invoke(NetTask.MakeDefault(
                                                 match["host"].Value + node.SelectSingleNode("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[1]/div[1]/div[1]/a[1]/img[1]").GetAttributeValue("src", "")));

            option.ProgressMax?.Invoke(sub_urls.Count);

            var sub_htmls = NetTools.DownloadStrings(sub_urls, "", () =>
            {
                option.PostStatus?.Invoke(1);
            });

            var result = new List <NetTask>();

            for (int i = 0; i < sub_urls.Count; i++)
            {
                var snode = sub_htmls[i].ToHtmlNode();
                int count = 1;
                foreach (var img in snode.SelectNodes("/html[1]/body[1]/div[1]/div[3]/div[2]/div[1]/div[2]/ul[1]//li/div[1]/img[1]"))
                {
                    var img_src = img.GetAttributeValue("data-src", "");
                    if (string.IsNullOrWhiteSpace(img_src))
                    {
                        img_src = img.GetAttributeValue("src", "");
                    }
                    var task = NetTask.MakeDefault(HttpUtility.HtmlDecode(img_src));
                    task.SaveFile = true;
                    task.Filename = count.ToString("000") + ".jpg";
                    task.Format   = new ExtractorFileNameFormat
                    {
                        Title   = title,
                        Episode = sub_titles[i],
                        FilenameWithoutExtension = count.ToString("000"),
                        Extension = Path.GetExtension(task.Filename).Replace(".", ""),
                    };
                    result.Add(task);
                    count++;
                }
            }

            result.ForEach(task => task.Format.Extractor = GetType().Name.Replace("Extractor", ""));
            return(result, new ExtractedInfo {
                Type = ExtractedInfo.ExtractedType.WorksComic
            });
        }