Exemplo n.º 1
0
        public IndexResults Combine(IndexResults results1)
        {
            TotalCnt += results1.TotalCnt;
            ErrorCnt += results1.ErrorCnt;

            if (results1.CrawledFoldersIds != null && results1.CrawledFoldersIds.Any())
            {
                CrawledFoldersIds.AddRange(results1.CrawledFoldersIds);
            }



            return(this);
        }
Exemplo n.º 2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="contentItems"></param>
        /// <returns></returns>
        public IndexResults RunUpdate(List <ContentCrawlProxy> contentItems)
        {
            var results = new IndexResults
            {
                TotalCnt = 0,
                ErrorCnt = 0
            };

            var cnt = 0;

            if (contentItems == null)
            {
                return(results);
            }

            foreach (var contentItem in contentItems)
            {
                try
                {
                    var doc = CreateSearchJsonDoc(contentItem.ContentItem._ContentID, contentItem);

                    results.TotalCnt++;

                    SearchClient.Post(doc);

                    if (++cnt != 1000)
                    {
                        continue;
                    }

                    SearchClient.Commit();
                }
                catch (Exception ex)
                {
                    LogError(string.Format("Indexing failed for \"{0}\". {1} {2}", contentItem.ContentItem.Name, ex.Message, ex.StackTrace));
                    results.ErrorCnt++;
                }
            }

            if (contentItems.Any())
            {
                SearchClient.Commit();
                SearchClient.Close();
            }

            return(results);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="contentItems"></param>
        /// <param name="statusCallback"></param>
        /// <param name="indexerCallback"></param>
        public IndexResults RunUpdate(IEnumerable <ContentCrawlProxy> contentItems, Global <T> .StatusCallBack statusCallback, Global <T> .IndexCallBack indexerCallback)
        {
            var results = new IndexResults
            {
                TotalCnt = 0,
                ErrorCnt = 0
            };

            var cnt = 0;

            if (contentItems == null)
            {
                return(results);
            }

            foreach (var contentItem in contentItems)
            {
                try
                {
                    if (contentItem.ContentItem.NotSearchable)
                    {
                        if (SearchClient.Search("id:" + contentItem.ContentItem._ContentID + " AND sourceid:" + _sourceId.ToString()).Results.Any())
                        {
                            SearchClient.Delete("id:" + contentItem.ContentItem._ContentID);
                            results.DeleteCnt++;
                        }
                        continue;
                    }

                    var doc = CreateSearchDoc(contentItem);

                    if (doc == null)
                    {
                        throw new Exception("error creating solr document");
                    }

                    results.TotalCnt++;

                    if (indexerCallback != null)
                    {
                        doc = indexerCallback(doc, contentItem.ContentItem);
                    }

                    SearchClient.Post(doc);

                    if (statusCallback != null)
                    {
                        if (!statusCallback())
                        {
                            results.Stopped = true;
                            return(results);
                        }
                    }

                    if (++cnt != 1000)
                    {
                        continue;
                    }

                    SearchClient.Commit();
                    cnt = 0;
                    //System.Threading.Thread.Sleep(5000);
                }
                catch (Exception ex)
                {
                    LogError(string.Format("Indexing failed for \"{0}\". {1} {2}", contentItem.ContentItem.Name, ex.Message, ex.StackTrace));
                    results.ErrorCnt++;
                }
            }

            if (contentItems.Any())
            {
                SearchClient.Commit();
                SearchClient.Close();
            }

            return(results);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="parameters"></param>
        /// <param name="statusCallback"></param>
        /// <param name="indexerCallback"></param>
        /// <returns></returns>
        public IndexResults RunUpdate(IEnumerable <ContentCrawlProxy> parameters, Global <T> .StatusCallBack statusCallback, Global <T> .IndexCallBack indexerCallback)
        {
            var results = new IndexResults();

            results.TotalCnt = 0;
            results.ErrorCnt = 0;

            foreach (var asset in parameters)
            {
                try
                {
                    /*
                     * if (asset.NotSearchable)
                     * {
                     *  if(SearchClient.Search("id:" + asset._ContentID).Results.Any())
                     *  {
                     *      SearchClient.Delete("id:" + asset._ContentID);
                     *      results.DeleteCnt++;
                     *  }
                     *  continue;
                     * }
                     * */

                    var doc = CreateSearchDoc(asset, results);

                    if (indexerCallback != null)
                    {
                        doc = indexerCallback(doc, asset.ContentItem);
                    }

                    if (statusCallback != null)
                    {
                        if (!statusCallback())
                        {
                            results.Stopped = true;
                            return(results);
                        }
                    }

                    SearchClient.Post(doc);
                    results.TotalCnt++;

                    //if (++cnt == 1000)
                    //{
                    //    SearchClient.PostCommit();
                    //   cnt = 0;
                    //  System.Threading.Thread.Sleep(5000);
                    //}
                }
                catch (Exception ex)
                {
                    LogError(string.Format("Indexing failed for ID: {0} NAME:{1}. {2} {3}", asset.ContentItem._ContentID, asset.ContentItem.Name, ex.Message, ex.StackTrace));
                    results.ErrorCnt++;
                }
            }

            SearchClient.Commit();
            SearchClient.Close();

            return(results);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="parameters"></param>
        /// <param name="results"></param>
        /// <returns></returns>
        private T CreateSearchDoc(ContentCrawlProxy parameters, IndexResults results)
        {
            var doc = (T)Activator.CreateInstance(typeof(T), new object[] { });

            doc.id = parameters.ContentItem._ContentID;
            //doc.title = parameters.ContentItem.Name;
            doc.sourceid = _sourceId;

            var docProps = doc.GetType().GetProperties();

            //var pageCrawlProps = parameters.CrawlProperties as ContentCrawlProxy;

            // load crawl properties
            if (parameters.Content != null && parameters.Content.Any())
            {
                foreach (var crawlPropContent in parameters.Content)
                {
                    var docProp = docProps.FirstOrDefault(p => p.Name == crawlPropContent.Name);

                    if (docProp != null)
                    {
                        SetPropertyValue(doc, docProp, crawlPropContent.Value);
                    }
                }
            }

            var pageBaseTypes = new List <Type>();

            var baseType = parameters.GetType().BaseType;

            while (baseType != null)
            {
                pageBaseTypes.Add(baseType);
                baseType = baseType.BaseType;
            }

            pageBaseTypes.Reverse();

            foreach (var bType in pageBaseTypes)
            {
                GetBaseProperties(parameters.ContentItem, doc, docProps, bType);
            }


            if (!((ISearchableAsset)parameters.ContentItem).DisableExtract)
            {
                try
                {
                    var blob = ((ISearchableAsset)parameters.ContentItem).AssetBlob;

                    if (blob != null && blob.Length <= Threshold)
                    {
                        //doc = SearchClient.Extract(doc, blob);
                        var responseXml = SearchClient.FileExtract(blob);

                        var xmlParser  = new XmlParser(responseXml);
                        var xhtml      = xmlParser.ParseHTML("/response/str");
                        var htmlParser = new HtmlParser(WebUtility.HtmlDecode(xhtml));

                        //doc.mimetype = xmlParser.ParseString("/response/lst/arr[@name='Content-Type']/str");
                        //var pubdate = xmlParser.ParseDate("/response/lst/arr[@name='Creation-Date']/str");

                        //if (pubdate != null)
                        //   doc.timestamp = pubdate.Value;

                        if (doc.content == null)
                        {
                            doc.content = new List <string>();
                        }

                        doc.content.Add(WebUtility.HtmlEncode(htmlParser.ParseStripInnerHtml("//body")));
                    }
                }
                catch (Exception ex)
                {
                    LogWarning(string.Format("Extraction failed for ID: {0} NAME:{1}. {2}", parameters.ContentItem._ContentID, parameters.ContentItem.Name, ex.Message));

                    if (results != null)
                    {
                        results.WarningCnt++;
                    }
                }
            }


            var docContent = doc.content != null?HtmlParser.StripHTML(string.Join(" ", doc.content)) : "";

            doc.highlightsummary = (HtmlParser.StripHTML(doc.summary) + " " + docContent + " " + doc.title).Trim();

            return(doc);
        }