public IndexResults Combine(IndexResults results1) { TotalCnt += results1.TotalCnt; ErrorCnt += results1.ErrorCnt; if (results1.CrawledFoldersIds != null && results1.CrawledFoldersIds.Any()) { CrawledFoldersIds.AddRange(results1.CrawledFoldersIds); } return(this); }
/// <summary> /// /// </summary> /// <param name="contentItems"></param> /// <returns></returns> public IndexResults RunUpdate(List <ContentCrawlProxy> contentItems) { var results = new IndexResults { TotalCnt = 0, ErrorCnt = 0 }; var cnt = 0; if (contentItems == null) { return(results); } foreach (var contentItem in contentItems) { try { var doc = CreateSearchJsonDoc(contentItem.ContentItem._ContentID, contentItem); results.TotalCnt++; SearchClient.Post(doc); if (++cnt != 1000) { continue; } SearchClient.Commit(); } catch (Exception ex) { LogError(string.Format("Indexing failed for \"{0}\". {1} {2}", contentItem.ContentItem.Name, ex.Message, ex.StackTrace)); results.ErrorCnt++; } } if (contentItems.Any()) { SearchClient.Commit(); SearchClient.Close(); } return(results); }
/// <summary> /// /// </summary> /// <param name="contentItems"></param> /// <param name="statusCallback"></param> /// <param name="indexerCallback"></param> public IndexResults RunUpdate(IEnumerable <ContentCrawlProxy> contentItems, Global <T> .StatusCallBack statusCallback, Global <T> .IndexCallBack indexerCallback) { var results = new IndexResults { TotalCnt = 0, ErrorCnt = 0 }; var cnt = 0; if (contentItems == null) { return(results); } foreach (var contentItem in contentItems) { try { if (contentItem.ContentItem.NotSearchable) { if (SearchClient.Search("id:" + contentItem.ContentItem._ContentID + " AND sourceid:" + _sourceId.ToString()).Results.Any()) { SearchClient.Delete("id:" + contentItem.ContentItem._ContentID); results.DeleteCnt++; } continue; } var doc = CreateSearchDoc(contentItem); if (doc == null) { throw new Exception("error creating solr document"); } results.TotalCnt++; if (indexerCallback != null) { doc = indexerCallback(doc, contentItem.ContentItem); } SearchClient.Post(doc); if (statusCallback != null) { if (!statusCallback()) { results.Stopped = true; return(results); } } if (++cnt != 1000) { continue; } SearchClient.Commit(); cnt = 0; //System.Threading.Thread.Sleep(5000); } catch (Exception ex) { LogError(string.Format("Indexing failed for \"{0}\". {1} {2}", contentItem.ContentItem.Name, ex.Message, ex.StackTrace)); results.ErrorCnt++; } } if (contentItems.Any()) { SearchClient.Commit(); SearchClient.Close(); } return(results); }
/// <summary> /// /// </summary> /// <param name="parameters"></param> /// <param name="statusCallback"></param> /// <param name="indexerCallback"></param> /// <returns></returns> public IndexResults RunUpdate(IEnumerable <ContentCrawlProxy> parameters, Global <T> .StatusCallBack statusCallback, Global <T> .IndexCallBack indexerCallback) { var results = new IndexResults(); results.TotalCnt = 0; results.ErrorCnt = 0; foreach (var asset in parameters) { try { /* * if (asset.NotSearchable) * { * if(SearchClient.Search("id:" + asset._ContentID).Results.Any()) * { * SearchClient.Delete("id:" + asset._ContentID); * results.DeleteCnt++; * } * continue; * } * */ var doc = CreateSearchDoc(asset, results); if (indexerCallback != null) { doc = indexerCallback(doc, asset.ContentItem); } if (statusCallback != null) { if (!statusCallback()) { results.Stopped = true; return(results); } } SearchClient.Post(doc); results.TotalCnt++; //if (++cnt == 1000) //{ // SearchClient.PostCommit(); // cnt = 0; // System.Threading.Thread.Sleep(5000); //} } catch (Exception ex) { LogError(string.Format("Indexing failed for ID: {0} NAME:{1}. {2} {3}", asset.ContentItem._ContentID, asset.ContentItem.Name, ex.Message, ex.StackTrace)); results.ErrorCnt++; } } SearchClient.Commit(); SearchClient.Close(); return(results); }
/// <summary> /// /// </summary> /// <param name="parameters"></param> /// <param name="results"></param> /// <returns></returns> private T CreateSearchDoc(ContentCrawlProxy parameters, IndexResults results) { var doc = (T)Activator.CreateInstance(typeof(T), new object[] { }); doc.id = parameters.ContentItem._ContentID; //doc.title = parameters.ContentItem.Name; doc.sourceid = _sourceId; var docProps = doc.GetType().GetProperties(); //var pageCrawlProps = parameters.CrawlProperties as ContentCrawlProxy; // load crawl properties if (parameters.Content != null && parameters.Content.Any()) { foreach (var crawlPropContent in parameters.Content) { var docProp = docProps.FirstOrDefault(p => p.Name == crawlPropContent.Name); if (docProp != null) { SetPropertyValue(doc, docProp, crawlPropContent.Value); } } } var pageBaseTypes = new List <Type>(); var baseType = parameters.GetType().BaseType; while (baseType != null) { pageBaseTypes.Add(baseType); baseType = baseType.BaseType; } pageBaseTypes.Reverse(); foreach (var bType in pageBaseTypes) { GetBaseProperties(parameters.ContentItem, doc, docProps, bType); } if (!((ISearchableAsset)parameters.ContentItem).DisableExtract) { try { var blob = ((ISearchableAsset)parameters.ContentItem).AssetBlob; if (blob != null && blob.Length <= Threshold) { //doc = SearchClient.Extract(doc, blob); var responseXml = SearchClient.FileExtract(blob); var xmlParser = new XmlParser(responseXml); var xhtml = xmlParser.ParseHTML("/response/str"); var htmlParser = new HtmlParser(WebUtility.HtmlDecode(xhtml)); //doc.mimetype = xmlParser.ParseString("/response/lst/arr[@name='Content-Type']/str"); //var pubdate = xmlParser.ParseDate("/response/lst/arr[@name='Creation-Date']/str"); //if (pubdate != null) // doc.timestamp = pubdate.Value; if (doc.content == null) { doc.content = new List <string>(); } doc.content.Add(WebUtility.HtmlEncode(htmlParser.ParseStripInnerHtml("//body"))); } } catch (Exception ex) { LogWarning(string.Format("Extraction failed for ID: {0} NAME:{1}. {2}", parameters.ContentItem._ContentID, parameters.ContentItem.Name, ex.Message)); if (results != null) { results.WarningCnt++; } } } var docContent = doc.content != null?HtmlParser.StripHTML(string.Join(" ", doc.content)) : ""; doc.highlightsummary = (HtmlParser.StripHTML(doc.summary) + " " + docContent + " " + doc.title).Trim(); return(doc); }