Esempio n. 1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="mediaItem"></param>
        /// <returns></returns>
        public ContentCrawlProxy BuildSearchableAsset(MediaData mediaItem)
        {
            var searchableAsset = mediaItem as ISearchableAsset;

            if (searchableAsset == null)
            {
                return(null);
            }

            var url = UrlResolver.Current.GetUrl(mediaItem.ContentLink);

            if (url == null)
            {
                return(null);
            }

            searchableAsset._ContentID = string.Format("{0}", mediaItem.ContentGuid);

            try
            {
                searchableAsset.AssetBlob = EpiHelper.ReadEpiBlob(mediaItem.BinaryData);
            }
            catch (Exception ex)
            {
                if (_logger != null)
                {
                    _logger.Error(string.Format("Asset Crawler: Error Retrieving Asset {0} {1}", ex.Message, ex.StackTrace));
                }
            }

            var pageCrawlParameters = new ContentCrawlProxy();


            pageCrawlParameters.Content.Add(new CrawlerContent()
            {
                Name  = "title",
                Value = mediaItem.Name,
            });

            pageCrawlParameters.Content.Add(new CrawlerContent()
            {
                Name  = "url",
                Value = url.Replace("?epslanguage=en", string.Format("?epslanguage={0}", mediaItem.Language.Name)),
            });

            pageCrawlParameters.Content.Add(new CrawlerContent()
            {
                Name  = "pagetype",
                Value = "Media",
            });

            pageCrawlParameters.Content.Add(new CrawlerContent()
            {
                Name  = "mimetype",
                Value = mediaItem.MimeType,
            });

            pageCrawlParameters.Content.Add(new CrawlerContent()
            {
                Name  = "contenttype",
                Value = MimeType.GetDisplayName(mediaItem.MimeType),
            });

            pageCrawlParameters.Content.Add(new CrawlerContent()
            {
                Name  = "language",
                Value = Languages,
            });

            pageCrawlParameters.Content.Add(new CrawlerContent()
            {
                Name  = "folder",
                Value = EpiHelper.GetParentFolderName(mediaItem.ParentLink.ToPageReference()),
            });


            pageCrawlParameters.Content.Add(new CrawlerContent()
            {
                Name  = "paths",
                Value = EpiHelper.GetPageTreePaths(mediaItem.ParentLink.ToPageReference()),
            });


            pageCrawlParameters.Content.Add(new CrawlerContent()
            {
                Name  = "path",
                Value = EpiHelper.GetFolderPath(mediaItem.ParentLink.ToPageReference()),
            });

            //searchableAsset.CrawlProperties = pageCrawlMetadata;
            pageCrawlParameters.ContentItem = searchableAsset;
            return(pageCrawlParameters);
        }
Esempio n. 2
0
        /// <summary>
        /// Used to intialize default properties
        /// </summary>
        /// <param name="page"></param>
        /// <returns></returns>
        public ContentCrawlProxy BuildSearchablePage(PageData page)
        {
            var searchablePage = page as ISearchableContent;

            if (searchablePage == null)
            {
                return(null);
            }

            searchablePage._ContentID = string.Format("{0}-{1}", page.ContentGuid, page.Language.Name);

            var pageProps = new ContentCrawlProxy();

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "title",
                Value = page.Name,
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "timestamp",
                Value = page.Changed,
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "url",
                Value = page.LinkURL.Replace("epslanguage=en", string.Format("epslanguage={0}", page.Language.Name)),
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "contentid",
                Value = page.ContentLink.ID.ToString(),
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "categories",
                Value = EpiHelper.GetCategoryPaths(page.Category),
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "hostname",
                Value = EpiHelper.GetSitePath(page.ContentLink),
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "folder",
                Value = EpiHelper.GetParentName(page.ParentLink.ToPageReference()),
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "path",
                Value = EpiHelper.GetPageTreePath(page.ParentLink.ToPageReference()),
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "paths",
                Value = EpiHelper.GetPageTreePaths(page.ParentLink.ToPageReference()),
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "pagetype",
                Value = page.PageTypeName,
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "mimetype",
                Value = "text/html",
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "contenttype",
                Value = "HTML",
            });

            pageProps.Content.Add(new CrawlerContent()
            {
                Name  = "language",
                Value = new List <string>()
                {
                    page.Language.Name,
                },
            });


            // if enabled scrape page content
            if (_crawlSettings.PageScrapper != null)
            {
                var scrapContent = _crawlSettings.PageScrapper.ScrapPage(EpiHelper.GetExternalURL(page));

                pageProps.Content.Add(new CrawlerContent()
                {
                    Name  = MissionSearch.Global.ContentField,
                    Value = scrapContent,
                });
            }

            // parse searchable block data and add to content
            var parsedBlockText = ProcessContentReferences(page);

            if (parsedBlockText.Any())
            {
                pageProps.Content.AddRange(parsedBlockText);
            }

            //searchablePage.CrawlProperties = pageCrawlMetadata;
            pageProps.ContentItem = searchablePage;

            return(pageProps);
        }