Exemplo n.º 1
0
        public static async Task <string> GetPageContentAsync(string previousPageContent, Charset charset, string url, string cookieString, string regexContentExclude, string contentHtmlClearCollection, string contentHtmlClearTagCollection, string regexContent, string regexContent2, string regexContent3, string regexNextPage)
        {
            var content = previousPageContent;
            var result  = await WebClientUtils.GetRemoteHtmlAsync(url, charset, cookieString);

            if (!result.IsSuccess)
            {
                throw new Exception(result.ErrorMessage);
            }

            var contentHtml     = result.Content;
            var nextPageContent = GetValue("content", regexContent, contentHtml);

            if (string.IsNullOrEmpty(nextPageContent) && !string.IsNullOrEmpty(regexContent2))
            {
                nextPageContent = GetValue("content", regexContent2, contentHtml);
            }
            if (string.IsNullOrEmpty(nextPageContent) && !string.IsNullOrEmpty(regexContent3))
            {
                nextPageContent = GetValue("content", regexContent3, contentHtml);
            }

            if (!string.IsNullOrEmpty(nextPageContent))
            {
                if (string.IsNullOrEmpty(content))
                {
                    content += nextPageContent;
                }
                else
                {
                    content += PagePlaceHolder + nextPageContent;
                }
            }

            if (!string.IsNullOrEmpty(regexContentExclude))
            {
                content = Replace(regexContentExclude, content, string.Empty);
            }
            if (!string.IsNullOrEmpty(contentHtmlClearCollection))
            {
                var htmlClearList = StringCollectionToList(contentHtmlClearCollection);
                foreach (var htmlClear in htmlClearList)
                {
                    var clearRegex = $@"<{htmlClear}[^>]*>.*?<\/{htmlClear}>";
                    content = Replace(clearRegex, content, string.Empty);
                }
            }
            if (!string.IsNullOrEmpty(contentHtmlClearTagCollection))
            {
                var htmlClearTagList = StringCollectionToList(contentHtmlClearTagCollection);
                foreach (var htmlClearTag in htmlClearTagList)
                {
                    var clearRegex = $@"<{htmlClearTag}[^>]*>";
                    content    = Replace(clearRegex, content, string.Empty);
                    clearRegex = $@"<\/{htmlClearTag}>";
                    content    = Replace(clearRegex, content, string.Empty);
                }
            }

            var contentNextPageUrl = GetUrl(regexNextPage, contentHtml, url);

            if (!string.IsNullOrEmpty(contentNextPageUrl))
            {
                if (StringUtils.EqualsIgnoreCase(url, contentNextPageUrl))
                {
                    contentNextPageUrl = string.Empty;
                }
            }
            return(!string.IsNullOrEmpty(contentNextPageUrl) ? await GetPageContentAsync(content, charset, contentNextPageUrl, cookieString, regexContentExclude, contentHtmlClearCollection, contentHtmlClearTagCollection, regexContent, regexContent2, regexContent3, regexNextPage) : content);
        }
Exemplo n.º 2
0
        public static async Task <List <Item> > GetItemsAsync(string gatherUrl, Rule rule)
        {
            var result = await WebClientUtils.GetRemoteHtmlAsync(gatherUrl, rule.Charset, rule.CookieString);

            if (!result.IsSuccess)
            {
                throw new Exception(result.ErrorMessage);
            }

            var pageHtml      = result.Content;
            var areaHtml      = string.Empty;
            var regexListArea = GetRegexArea(rule.ListAreaStart, rule.ListAreaEnd);

            if (!string.IsNullOrEmpty(regexListArea))
            {
                areaHtml = GetValue("area", regexListArea, pageHtml);
            }

            var listHtml = !string.IsNullOrEmpty(areaHtml) ? areaHtml : pageHtml;

            var regexContentUrl = GetRegexUrl(rule.ContentUrlStart, rule.ContentUrlEnd);
            var regexImageUrl   = string.Empty;

            if (rule.ImageSource == ImageSource.List)
            {
                regexImageUrl = GetRegexUrl(rule.ImageUrlStart, rule.ImageUrlEnd);
            }

            var regexTitle = string.Empty;

            if (rule.ContentTitleByList)
            {
                regexTitle = GetRegexTitle(rule.ContentTitleStart, rule.ContentTitleEnd);
            }

            var contentAttributes = ListUtils.GetStringList(rule.ContentAttributes);

            var contentUrls = GetValues("url", regexContentUrl, listHtml);
            var imageUrls   = GetValues("url", regexImageUrl, listHtml);
            var titles      = GetValues("title", regexTitle, listHtml);

            var attributesDict = new Dictionary <string, List <string> >();

            foreach (var attributeName in contentAttributes)
            {
                var normalByList = GetByListValue(rule, attributeName);
                if (!normalByList)
                {
                    continue;
                }

                var normalStart = GetStartValue(rule, attributeName);
                var normalEnd   = GetEndValue(rule, attributeName);
                var regex       = GetRegexAttributeName(attributeName, normalStart, normalEnd);
                var values      = GetValues(attributeName, regex, listHtml);
                attributesDict[attributeName] = values;
            }

            var myUri = new Uri(gatherUrl);
            var host  = myUri.Scheme + "://" + myUri.Host;

            if (!myUri.IsDefaultPort)
            {
                host += ":" + myUri.Port;
            }

            var contentUrlList = new List <string>();

            foreach (var contentUrl in contentUrls)
            {
                if (string.IsNullOrEmpty(contentUrl))
                {
                    continue;
                }

                var url = string.Empty;
                if (PageUtils.IsProtocolUrl(contentUrl))
                {
                    url = contentUrl;
                }
                else if (contentUrl.StartsWith('/'))
                {
                    url = PageUtils.Combine(host, contentUrl);
                }

                if (string.IsNullOrEmpty(url))
                {
                    continue;
                }

                if (!contentUrlList.Contains(url))
                {
                    contentUrlList.Add(url);
                }
            }

            var imageUrlList = new List <string>();

            foreach (var imageUrl in imageUrls)
            {
                if (string.IsNullOrEmpty(imageUrl))
                {
                    continue;
                }

                var url = string.Empty;
                if (PageUtils.IsProtocolUrl(imageUrl))
                {
                    url = imageUrl;
                }
                else if (imageUrl.StartsWith('/'))
                {
                    url = PageUtils.Combine(host, imageUrl);
                }

                if (string.IsNullOrEmpty(url))
                {
                    continue;
                }

                if (!imageUrlList.Contains(url))
                {
                    imageUrlList.Add(url);
                }
            }

            var items = new List <Item>();

            for (var i = 0; i < contentUrlList.Count; i++)
            {
                var content = new Content();

                var imageUrl = imageUrls.Count > i ? imageUrls[i] : string.Empty;
                var title    = titles.Count > i ? titles[i] : string.Empty;

                if (!string.IsNullOrEmpty(imageUrl))
                {
                    if (imageUrl.StartsWith('/'))
                    {
                        imageUrl = PageUtils.Combine(host, imageUrl);
                    }
                }

                content.ImageUrl = imageUrl;
                content.Title    = title;

                foreach (var attributeName in contentAttributes)
                {
                    var normalByList = GetByListValue(rule, attributeName);
                    if (!normalByList)
                    {
                        continue;
                    }

                    var normalDefault = GetDefaultValue(rule, attributeName);
                    var values        = attributesDict[attributeName];

                    var value = values.Count > i ? values[i] : normalDefault;
                    content.Set(attributeName, value);
                }

                items.Add(new Item
                {
                    Url     = contentUrlList[i],
                    Content = content
                });
            }

            return(items);
        }
Exemplo n.º 3
0
        public static async Task <NameValueCollection> GetContentNameValueCollectionAsync(Rule rule, Item item)
        {
            var attributes = new NameValueCollection();

            var result = await WebClientUtils.GetRemoteHtmlAsync(item.Url, rule.Charset, rule.CookieString);

            if (!result.IsSuccess)
            {
                throw new Exception(result.ErrorMessage);
            }

            var contentHtml         = result.Content;
            var regexContentExclude = GatherUtils.GetRegexString(rule.ContentExclude);
            var regexChannel        = GatherUtils.GetRegexChannel(rule.ContentChannelStart, rule.ContentChannelEnd);
            var regexContent        = GatherUtils.GetRegexContent(rule.ContentContentStart, rule.ContentContentEnd);
            var regexContent2       = string.Empty;

            if (!string.IsNullOrEmpty(rule.ContentContentStart2) && !string.IsNullOrEmpty(rule.ContentContentEnd2))
            {
                regexContent2 = GatherUtils.GetRegexContent(rule.ContentContentStart2, rule.ContentContentEnd2);
            }
            var regexContent3 = string.Empty;

            if (!string.IsNullOrEmpty(rule.ContentContentStart3) && !string.IsNullOrEmpty(rule.ContentContentEnd3))
            {
                regexContent3 = GatherUtils.GetRegexContent(rule.ContentContentStart3, rule.ContentContentEnd3);
            }
            var regexNextPage     = GatherUtils.GetRegexUrl(rule.ContentNextPageStart, rule.ContentNextPageEnd);
            var regexTitle        = GatherUtils.GetRegexTitle(rule.ContentTitleStart, rule.ContentTitleEnd);
            var contentAttributes = ListUtils.GetStringList(rule.ContentAttributes);

            var title = rule.ContentTitleByList ? item.Content.Title : GetValue("title", regexTitle, contentHtml);
            var body  = GetValue("content", regexContent, contentHtml);

            if (string.IsNullOrEmpty(body) && !string.IsNullOrEmpty(regexContent2))
            {
                body = GetValue("content", regexContent2, contentHtml);
            }
            if (string.IsNullOrEmpty(body) && !string.IsNullOrEmpty(regexContent3))
            {
                body = GetValue("content", regexContent3, contentHtml);
            }

            if (!string.IsNullOrEmpty(regexContentExclude))
            {
                body = Replace(regexContentExclude, body, string.Empty);
            }
            if (!string.IsNullOrEmpty(rule.ContentHtmlClearCollection))
            {
                var htmlClearList = StringCollectionToList(rule.ContentHtmlClearCollection);
                foreach (var htmlClear in htmlClearList)
                {
                    var clearRegex = $@"<{htmlClear}[^>]*>.*?<\/{htmlClear}>";
                    body = Replace(clearRegex, body, string.Empty);
                }
            }
            if (!string.IsNullOrEmpty(rule.ContentHtmlClearTagCollection))
            {
                var htmlClearTagList = StringCollectionToList(rule.ContentHtmlClearTagCollection);
                foreach (var htmlClearTag in htmlClearTagList)
                {
                    var clearRegex = $@"<{htmlClearTag}[^>]*>";
                    body       = Replace(clearRegex, body, string.Empty);
                    clearRegex = $@"<\/{htmlClearTag}>";
                    body       = Replace(clearRegex, body, string.Empty);
                }
            }

            var contentNextPageUrl = GetUrl(regexNextPage, contentHtml, item.Url);

            if (!string.IsNullOrEmpty(contentNextPageUrl))
            {
                body = await GetPageContentAsync(body, rule.Charset, contentNextPageUrl, rule.CookieString, regexContentExclude, rule.ContentHtmlClearCollection, rule.ContentHtmlClearTagCollection, regexContent, regexContent2, regexContent3, regexNextPage);
            }
            var channel = GetValue("channel", regexChannel, contentHtml);

            foreach (var attributeName in contentAttributes)
            {
                var normalByList  = GetByListValue(rule, attributeName);
                var normalStart   = GetStartValue(rule, attributeName);
                var normalEnd     = GetEndValue(rule, attributeName);
                var normalDefault = GetDefaultValue(rule, attributeName);
                var regex         = GetRegexAttributeName(attributeName, normalStart, normalEnd);
                var value         = normalByList ? item.Content.Get <string>(attributeName) : GetValue(attributeName, regex, contentHtml);
                if (string.IsNullOrEmpty(value))
                {
                    value = normalDefault;
                }
                attributes.Set(attributeName, value);
            }

            attributes.Add("标题", title);
            attributes.Add("栏目", channel);
            attributes.Add("正文", body);

            return(attributes);
        }
Exemplo n.º 4
0
        private async Task <(bool Success, string Title, string ErrorMessage)> GatherOneAsync(Site siteInfo, Channel channelInfo, string regexTitleInclude, string regexContentExclude, string regexTitle, string regexContent, string regexContent2, string regexContent3, string regexNextPage, string regexChannel, IEnumerable <string> contentAttributes, Rule rule, Item item, ICollection <KeyValuePair <int, int> > channelIdAndContentIdList, int adminId)
        {
            try
            {
                var result = await WebClientUtils.GetRemoteHtmlAsync(item.Url, rule.Charset, rule.CookieString);

                if (!result.IsSuccess)
                {
                    return(false, string.Empty, result.ErrorMessage);
                }
                var contentHtml  = result.Content;
                var errorMessage = string.Empty;

                var title   = rule.ContentTitleByList ? item.Content.Title : GatherUtils.GetValue("title", regexTitle, contentHtml);
                var content = GatherUtils.GetValue("content", regexContent, contentHtml);
                if (string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(regexContent2))
                {
                    content = GatherUtils.GetValue("content", regexContent2, contentHtml);
                }
                if (string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(regexContent3))
                {
                    content = GatherUtils.GetValue("content", regexContent3, contentHtml);
                }

                //如果标题或内容为空,返回false并退出
                if (string.IsNullOrEmpty(title))
                {
                    errorMessage = $"无法获取标题:{item.Url}";
                    return(false, title, errorMessage);
                }
                if (rule.IsEmptyContentAllowed == false && string.IsNullOrEmpty(content))
                {
                    errorMessage = $"无法获取内容正文:{item.Url}";
                    return(false, title, errorMessage);
                }

                title = StringUtils.StripTags(title);

                if (!string.IsNullOrEmpty(regexTitleInclude))
                {
                    if (GatherUtils.IsMatch(regexTitleInclude, title) == false)
                    {
                        errorMessage = $"标题不符合要求:{item.Url}";
                        return(false, title, errorMessage);
                    }
                }
                if (!string.IsNullOrEmpty(regexContentExclude))
                {
                    content = GatherUtils.Replace(regexContentExclude, content, string.Empty);
                }
                if (!string.IsNullOrEmpty(rule.ContentHtmlClearCollection))
                {
                    var htmlClearList = GatherUtils.StringCollectionToList(rule.ContentHtmlClearCollection);
                    foreach (var htmlClear in htmlClearList)
                    {
                        var clearRegex = $@"<{htmlClear}[^>]*>.*?<\/{htmlClear}>";
                        content = GatherUtils.Replace(clearRegex, content, string.Empty);
                    }
                }
                if (!string.IsNullOrEmpty(rule.ContentHtmlClearTagCollection))
                {
                    var htmlClearTagList = GatherUtils.StringCollectionToList(rule.ContentHtmlClearTagCollection);
                    foreach (var htmlClearTag in htmlClearTagList)
                    {
                        var clearRegex = $@"<{htmlClearTag}[^>]*>";
                        content    = GatherUtils.Replace(clearRegex, content, string.Empty);
                        clearRegex = $@"<\/{htmlClearTag}>";
                        content    = GatherUtils.Replace(clearRegex, content, string.Empty);
                    }
                }

                var contentNextPageUrl = GatherUtils.GetUrl(regexNextPage, contentHtml, item.Url);
                if (!string.IsNullOrEmpty(contentNextPageUrl))
                {
                    try
                    {
                        content = await GatherUtils.GetPageContentAsync(content, rule.Charset, contentNextPageUrl, rule.CookieString, regexContentExclude, rule.ContentHtmlClearCollection, rule.ContentHtmlClearTagCollection, regexContent, regexContent2, regexContent3, regexNextPage);
                    }
                    catch (Exception ex)
                    {
                        errorMessage = ex.Message;
                        return(false, title, errorMessage);
                    }
                }

                var channel   = GatherUtils.GetValue("channel", regexChannel, contentHtml);
                var channelId = channelInfo.Id;
                if (!string.IsNullOrEmpty(channel))
                {
                    var channelIdByNodeName = 0;

                    var childChannelIdList = await _channelRepository.GetChannelIdsAsync(siteInfo.Id, channelInfo.Id, ScopeType.All);

                    foreach (var childChannelId in childChannelIdList)
                    {
                        if (channel == await _channelRepository.GetChannelNameAsync(siteInfo.Id, childChannelId))
                        {
                            channelIdByNodeName = childChannelId;
                        }
                    }

                    //var channelIdByNodeName = ChannelManager.GetChannelIdByParentIdAndChannelName(siteInfo.Id, channelInfo.Id, channel, recursive: false);
                    if (channelIdByNodeName == 0)
                    {
                        var newChannelInfo = new Channel
                        {
                            SiteId               = siteInfo.Id,
                            ParentId             = channelInfo.Id,
                            ChannelName          = channel,
                            ContentModelPluginId = channelInfo.ContentModelPluginId
                        };

                        channelId = await _channelRepository.InsertAsync(newChannelInfo);
                    }
                    else
                    {
                        channelId = channelIdByNodeName;
                    }
                }

                if (!rule.IsSameTitleAllowed)
                {
                    var theChannel = await _channelRepository.GetAsync(channelId);

                    var contentIds = await _contentRepository.GetContentIdsBySameTitleAsync(siteInfo, theChannel, title);

                    if (contentIds.Count > 0)
                    {
                        errorMessage = $"已包含相同标题:{title}";
                        return(false, title, errorMessage);
                    }
                }

                var contentInfo = new Content
                {
                    AddDate = DateTime.Now
                };

                foreach (var attributeName in contentAttributes)
                {
                    if (!StringUtils.EqualsIgnoreCase(attributeName, nameof(Content.Title)) && !StringUtils.EqualsIgnoreCase(attributeName, nameof(Content.Body)))
                    {
                        var normalByList = GatherUtils.GetByListValue(rule, attributeName);
                        var normalStart  = GatherUtils.GetStartValue(rule, attributeName);
                        var normalEnd    = GatherUtils.GetEndValue(rule, attributeName);

                        //采集为空时的默认值
                        var normalDefault = GatherUtils.GetDefaultValue(rule, attributeName);

                        var regex = GatherUtils.GetRegexAttributeName(attributeName, normalStart, normalEnd);
                        var value = normalByList ? item.Content.Get <string>(attributeName) : GatherUtils.GetValue(attributeName, regex, contentHtml);

                        //采集为空时的默认值
                        if (string.IsNullOrEmpty(value))
                        {
                            value = normalDefault;
                        }

                        if (StringUtils.EqualsIgnoreCase(nameof(Content.AddDate), attributeName))
                        {
                            value = GatherUtils.ReplaceFirst(value, ":", ":");
                            contentInfo.AddDate = TranslateUtils.ToDateTime(value, DateTime.Now);
                        }
                        else if (StringUtils.EqualsIgnoreCase(nameof(Content.Color), attributeName))
                        {
                            contentInfo.Color = TranslateUtils.ToBool(value, defaultValue: false);
                        }
                        else if (StringUtils.EqualsIgnoreCase(nameof(Content.Hot), attributeName))
                        {
                            contentInfo.Hot = TranslateUtils.ToBool(value, defaultValue: false);
                        }
                        else if (StringUtils.EqualsIgnoreCase(nameof(Content.Recommend), attributeName))
                        {
                            contentInfo.Recommend = TranslateUtils.ToBool(value, defaultValue: false);
                        }
                        else if (StringUtils.EqualsIgnoreCase(nameof(Content.Top), attributeName))
                        {
                            contentInfo.Top = TranslateUtils.ToBool(value, defaultValue: false);
                        }
                        else if (StringUtils.EqualsIgnoreCase(nameof(Content.ImageUrl), attributeName))
                        {
                            if (!string.IsNullOrEmpty(value))
                            {
                                var attachmentUrl = GatherUtils.GetUrlByBaseUrl(value, item.Url);

                                var fileExtension = PageUtils.GetExtensionFromUrl(attachmentUrl);
                                var fileName      =
                                    $"{StringUtils.GetShortGuid(false)}{fileExtension}";

                                var directoryPath = await _pathManager.GetUploadDirectoryPathAsync(siteInfo, UploadType.Image);

                                var filePath = PathUtils.Combine(directoryPath, fileName);
                                DirectoryUtils.CreateDirectoryIfNotExists(filePath);
                                try
                                {
                                    await WebClientUtils.DownloadAsync(attachmentUrl, filePath);

                                    contentInfo.ImageUrl =
                                        await _pathManager.GetVirtualUrlByPhysicalPathAsync(siteInfo, filePath);
                                }
                                catch
                                {
                                    // ignored
                                }
                            }
                        }
                        else if (StringUtils.EqualsIgnoreCase(nameof(Content.VideoUrl), attributeName))
                        {
                            if (!string.IsNullOrEmpty(value))
                            {
                                var attachmentUrl = GatherUtils.GetUrlByBaseUrl(value, item.Url);
                                var fileExtension = PageUtils.GetExtensionFromUrl(attachmentUrl);
                                var fileName      = $"{StringUtils.GetShortGuid(false)}{fileExtension}";

                                var directoryPath = await _pathManager.GetUploadDirectoryPathAsync(siteInfo, UploadType.Video);

                                var filePath = PathUtils.Combine(directoryPath, fileName);
                                DirectoryUtils.CreateDirectoryIfNotExists(filePath);
                                try
                                {
                                    await WebClientUtils.DownloadAsync(attachmentUrl, filePath);

                                    contentInfo.VideoUrl = await _pathManager.GetVirtualUrlByPhysicalPathAsync(siteInfo, filePath);
                                }
                                catch
                                {
                                    // ignored
                                }
                            }
                        }
                        else if (StringUtils.EqualsIgnoreCase(nameof(Content.FileUrl), attributeName))
                        {
                            if (!string.IsNullOrEmpty(value))
                            {
                                var attachmentUrl = GatherUtils.GetUrlByBaseUrl(value, item.Url);
                                var fileExtension = PageUtils.GetExtensionFromUrl(attachmentUrl);
                                var fileName      = $"{StringUtils.GetShortGuid(false)}{fileExtension}";

                                var directoryPath = await _pathManager.GetUploadDirectoryPathAsync(siteInfo, UploadType.File);

                                var filePath = PathUtils.Combine(directoryPath, fileName);
                                DirectoryUtils.CreateDirectoryIfNotExists(filePath);
                                try
                                {
                                    await WebClientUtils.DownloadAsync(attachmentUrl, filePath);

                                    contentInfo.FileUrl = await _pathManager.GetVirtualUrlByPhysicalPathAsync(siteInfo, filePath);
                                }
                                catch
                                {
                                    // ignored
                                }
                            }
                        }
                        else if (StringUtils.EqualsIgnoreCase(nameof(Content.Hits), attributeName))
                        {
                            contentInfo.Hits = TranslateUtils.ToInt(value);
                        }
                        else if (StringUtils.EqualsIgnoreCase("FileName", attributeName) && !string.IsNullOrEmpty(rule.FileNameAttributeName))
                        {
                            var fileName = PathUtils.GetFileNameWithoutExtension(item.Url);
                            contentInfo.Set(rule.FileNameAttributeName, fileName);
                        }
                        else
                        {
                            contentInfo.Set(attributeName, value);
                        }
                    }
                }

                var firstImageUrl = string.Empty;
                if (rule.IsSaveImage)
                {
                    var originalImageSrcList = GatherUtils.GetOriginalImageSrcList(content);
                    var imageSrcList         = GatherUtils.GetImageSrcList(item.Url, content);
                    if (originalImageSrcList.Count == imageSrcList.Count)
                    {
                        for (var i = 0; i < originalImageSrcList.Count; i++)
                        {
                            var originalImageSrc = originalImageSrcList[i];
                            var imageSrc         = imageSrcList[i];

                            var fileExtension = PathUtils.GetExtension(originalImageSrc);
                            var fileName      = $"{StringUtils.GetShortGuid(false)}{fileExtension}";

                            var directoryPath = await _pathManager.GetUploadDirectoryPathAsync(siteInfo, UploadType.Image);

                            var filePath = PathUtils.Combine(directoryPath, fileName);
                            DirectoryUtils.CreateDirectoryIfNotExists(filePath);
                            try
                            {
                                await WebClientUtils.DownloadAsync(imageSrc, filePath);

                                var fileUrl = await _pathManager.GetVirtualUrlByPhysicalPathAsync(siteInfo, filePath);

                                content = content.Replace(originalImageSrc, fileUrl);
                                if (firstImageUrl == string.Empty)
                                {
                                    firstImageUrl = fileUrl;
                                }
                            }
                            catch
                            {
                                // ignored
                            }
                        }
                    }
                }

                if (rule.ImageSource == ImageSource.Content)
                {
                    if (string.IsNullOrEmpty(firstImageUrl))
                    {
                        var imageSrcList = GatherUtils.GetImageSrcList(item.Url, content);
                        if (imageSrcList.Count > 0)
                        {
                            firstImageUrl = imageSrcList[index : 0];
                        }
                    }

                    if (!string.IsNullOrEmpty(firstImageUrl))
                    {
                        contentInfo.ImageUrl = firstImageUrl;
                    }
                }
                else if (rule.ImageSource == ImageSource.List)
                {
                    contentInfo.ImageUrl = item.Content.ImageUrl;
                }

                if (rule.IsSaveFiles)
                {
                    var originalLinkHrefList = GatherUtils.GetOriginalLinkHrefList(content);
                    var linkHrefList         = GatherUtils.GetLinkHrefList(item.Url, content);
                    if (originalLinkHrefList.Count == linkHrefList.Count)
                    {
                        for (var i = 0; i < originalLinkHrefList.Count; i++)
                        {
                            var originalLinkHref = originalLinkHrefList[i];
                            var linkHref         = linkHrefList[i];

                            var fileExtension = PathUtils.GetExtension(originalLinkHref);
                            var fileName      = $"{StringUtils.GetShortGuid(false)}{fileExtension}";

                            var directoryPath = await _pathManager.GetUploadDirectoryPathAsync(siteInfo, UploadType.File);

                            var filePath = PathUtils.Combine(directoryPath, fileName);
                            DirectoryUtils.CreateDirectoryIfNotExists(filePath);
                            try
                            {
                                await WebClientUtils.DownloadAsync(linkHref, filePath);

                                var fileUrl = await _pathManager.GetVirtualUrlByPhysicalPathAsync(siteInfo, filePath);

                                content = content.Replace(originalLinkHref, fileUrl);
                            }
                            catch
                            {
                                // ignored
                            }
                        }
                    }
                }

                //contentInfo.Content = StringUtility.TextEditorContentEncode(content, siteInfo, false);
                contentInfo.SiteId          = siteInfo.Id;
                contentInfo.ChannelId       = channelId;
                contentInfo.AdminId         = adminId;
                contentInfo.LastEditAdminId = adminId;
                contentInfo.Checked         = rule.IsChecked;
                contentInfo.CheckedLevel    = 0;
                contentInfo.Title           = title;
                contentInfo.Body            = content;

                //contentInfo.SourceId = SourceManager.CaiJi;

                var theContentId = await _contentRepository.InsertAsync(siteInfo, channelInfo, contentInfo);

                channelIdAndContentIdList.Add(new KeyValuePair <int, int>(contentInfo.ChannelId, theContentId));

                return(true, title, string.Empty);
            }
            catch (Exception ex)
            {
                return(false, string.Empty, ex.Message);
            }
        }