public T Update(ContentCrawlProxy parameters) { var doc = CreateSearchDoc(parameters, null); SearchClient.Post(doc); SearchClient.Commit(); SearchClient.Close(); return(doc); }
/// <summary> /// /// </summary> /// <param name="asset"></param> /// <returns></returns> public T Update(ISearchableAsset asset) { var parameters = new ContentCrawlProxy() { ContentItem = asset }; var doc = CreateSearchDoc(parameters, null); SearchClient.Post(doc); SearchClient.Commit(); SearchClient.Close(); return(doc); }
/// <summary> /// /// </summary> /// <param name="contentItem"></param> /// <returns></returns> public T Update(ContentCrawlProxy contentItem) { var doc = CreateSearchDoc(contentItem); if (contentItem.ContentItem.NotSearchable) { SearchClient.DeleteById(doc.id); } else { SearchClient.Post(doc); } SearchClient.Commit(); SearchClient.Close(); return(doc); }
/// <summary> /// /// </summary> /// <param name="contentItem"></param> /// <returns></returns> private T CreateSearchDoc(ContentCrawlProxy contentItem) { if (contentItem == null) { return(default(T)); } var doc = (T)Activator.CreateInstance(typeof(T), new object[] { }); doc.sourceid = _sourceId; var docProps = doc.GetType().GetProperties(); if (contentItem.ContentItem != null) { doc.id = contentItem.ContentItem._ContentID; var contentBaseTypes = ReflectionUtil.GetBaseTypes(contentItem.ContentItem); foreach (var bType in contentBaseTypes) { GetBaseProperties(contentItem.ContentItem, doc, docProps, bType); } } if (contentItem.Content != null && contentItem.Content.Any()) { foreach (var crawlPropContent in contentItem.Content) { var docProp = docProps.FirstOrDefault(p => p.Name == crawlPropContent.Name); if (docProp != null) { SetPropertyValue(doc, docProp, crawlPropContent.Value); } } } var docContent = doc.content != null?HtmlParser.StripHTML(string.Join(" ", doc.content)) : ""; doc.highlightsummary = (HtmlParser.StripHTML(doc.summary) + " " + docContent + " " + doc.title).Trim(); return(doc); }
/// <summary> /// /// </summary> /// <param name="parameters"></param> /// <param name="results"></param> /// <returns></returns> private T CreateSearchDoc(ContentCrawlProxy parameters, IndexResults results) { var doc = (T)Activator.CreateInstance(typeof(T), new object[] { }); doc.id = parameters.ContentItem._ContentID; //doc.title = parameters.ContentItem.Name; doc.sourceid = _sourceId; var docProps = doc.GetType().GetProperties(); //var pageCrawlProps = parameters.CrawlProperties as ContentCrawlProxy; // load crawl properties if (parameters.Content != null && parameters.Content.Any()) { foreach (var crawlPropContent in parameters.Content) { var docProp = docProps.FirstOrDefault(p => p.Name == crawlPropContent.Name); if (docProp != null) { SetPropertyValue(doc, docProp, crawlPropContent.Value); } } } var pageBaseTypes = new List <Type>(); var baseType = parameters.GetType().BaseType; while (baseType != null) { pageBaseTypes.Add(baseType); baseType = baseType.BaseType; } pageBaseTypes.Reverse(); foreach (var bType in pageBaseTypes) { GetBaseProperties(parameters.ContentItem, doc, docProps, bType); } if (!((ISearchableAsset)parameters.ContentItem).DisableExtract) { try { var blob = ((ISearchableAsset)parameters.ContentItem).AssetBlob; if (blob != null && blob.Length <= Threshold) { //doc = SearchClient.Extract(doc, blob); var responseXml = SearchClient.FileExtract(blob); var xmlParser = new XmlParser(responseXml); var xhtml = xmlParser.ParseHTML("/response/str"); var htmlParser = new HtmlParser(WebUtility.HtmlDecode(xhtml)); //doc.mimetype = xmlParser.ParseString("/response/lst/arr[@name='Content-Type']/str"); //var pubdate = xmlParser.ParseDate("/response/lst/arr[@name='Creation-Date']/str"); //if (pubdate != null) // doc.timestamp = pubdate.Value; if (doc.content == null) { doc.content = new List <string>(); } doc.content.Add(WebUtility.HtmlEncode(htmlParser.ParseStripInnerHtml("//body"))); } } catch (Exception ex) { LogWarning(string.Format("Extraction failed for ID: {0} NAME:{1}. {2}", parameters.ContentItem._ContentID, parameters.ContentItem.Name, ex.Message)); if (results != null) { results.WarningCnt++; } } } var docContent = doc.content != null?HtmlParser.StripHTML(string.Join(" ", doc.content)) : ""; doc.highlightsummary = (HtmlParser.StripHTML(doc.summary) + " " + docContent + " " + doc.title).Trim(); return(doc); }
/// <summary> /// /// </summary> /// <param name="id"></param> /// <param name="crawlProperties"></param> /// <returns></returns> public string CreateSearchJsonDoc(string id, ContentCrawlProxy crawlProperties) { if (crawlProperties == null) { return(null); } crawlProperties.Content.Insert(0, new CrawlerContent() { Value = id, Name = "id", }); crawlProperties.Content.Insert(1, new CrawlerContent() { Value = _sourceId, Name = "sourceid", }); var dict = new Dictionary <string, object>(); var propCnts = from x in crawlProperties.Content group x by x.Name into g let count = g.Count() orderby count descending select new { Name = g.Key, Count = count }; foreach (var propCnt in propCnts) { if (propCnt.Count > 1) { var values = crawlProperties.Content .Where(p => p.Name == propCnt.Name && p.Value != null) .Select(p => p.Value) .ToList(); dict.Add(propCnt.Name, values); } else { var crawlProp = crawlProperties.Content.FirstOrDefault(p => p.Name == propCnt.Name); if (crawlProp != null && crawlProp.Value != null) { var isEnumerable = crawlProp.Value as IList; if (isEnumerable != null) { if (isEnumerable.Count > 0) { dict.Add(propCnt.Name, crawlProp.Value); } } else { dict.Add(propCnt.Name, crawlProp.Value); } } } } var settings = new JsonSerializerSettings { DateFormatString = "yyyy-MM-ddTHH:mm:ssZ", DateTimeZoneHandling = DateTimeZoneHandling.Local }; return(JsonConvert.SerializeObject(dict, settings)); }