/// <summary> /// get dataset for single item reindex via event /// </summary> /// <param name="filePath"></param> /// <param name="docIndex"></param> /// <returns></returns> public static Dictionary<string, string> GetDataSet(string filePath, int docIndex) { var simpleDataSet = new SimpleDataSet { NodeDefinition = new IndexedNode(), RowData = new Dictionary<string, string>() }; var fileInfo = new FileInfo(filePath); simpleDataSet = MapFileToSimpleDataIndexItem(fileInfo, simpleDataSet, docIndex, "documentation"); return simpleDataSet.RowData; }
public static SimpleDataSet MapFileToSimpleDataIndexItem(FileInfo file, SimpleDataSet simpleDataSet, int index, string indexType) { var lines = new List<string>(); lines.AddRange(File.ReadAllLines(file.FullName)); var body = lines.Any() ? umbraco.library.StripHtml(RemoveSpecialCharacters(string.Join("", lines))) : string.Empty; var firstHeadline = lines.FirstOrDefault(x => x.StartsWith("#")); var headLine = firstHeadline ?? file.FullName.Substring(file.FullName.LastIndexOf("\\", StringComparison.Ordinal) + 1).Replace(".md", string.Empty).Replace("-", " "); simpleDataSet.NodeDefinition.NodeId = index; simpleDataSet.NodeDefinition.Type = indexType; simpleDataSet.RowData.Add("body", body); simpleDataSet.RowData.Add("nodeName", RemoveSpecialCharacters(headLine)); simpleDataSet.RowData.Add("updateDate", file.CreationTime.ToString("yyyy-MM-dd HH:mm:ss")); simpleDataSet.RowData.Add("nodeTypeAlias", "documentation"); simpleDataSet.RowData.Add("dateCreated", file.CreationTime.ToString("yyyy-MM-dd HH:mm:ss")); //TODO: This will always be exactly the same since all files are written at the same time IIRC simpleDataSet.RowData.Add("Path", file.FullName); simpleDataSet.RowData.Add("searchAblePath", file.FullName.Replace("\\", " ").Replace(":", "")); simpleDataSet.RowData.Add("url", BuildUrl(file.FullName)); return simpleDataSet; }
public static SimpleDataSet MapFileToSimpleDataIndexItem(FileInfo file, SimpleDataSet simpleDataSet, int index, string indexType) { var lines = new List<string>(); lines.AddRange(File.ReadAllLines(file.FullName)); var headLine = string.Empty; var body = string.Empty; if (lines.Count > 0) { headLine = RemoveSpecialCharacters(lines[0]); lines.RemoveAt(0); body = umbraco.library.StripHtml( RemoveSpecialCharacters(string.Join("", lines)) ); } simpleDataSet.NodeDefinition.NodeId = index; simpleDataSet.NodeDefinition.Type = indexType; simpleDataSet.RowData.Add("body", body); simpleDataSet.RowData.Add("nodeName", headLine); simpleDataSet.RowData.Add("updateDate", file.CreationTime.ToString("yyyy-MM-dd HH:mm:ss")); simpleDataSet.RowData.Add("nodeTypeAlias", "documentation"); simpleDataSet.RowData.Add("dateCreated", file.CreationTime.ToString("yyyy-MM-dd HH:mm:ss")); //TODO: This will always be exactly the same since all files are written at the same time IIRC simpleDataSet.RowData.Add("Path", file.FullName); simpleDataSet.RowData.Add("searchAblePath", file.FullName.Replace("\\", " ").Replace(":", "")); simpleDataSet.RowData.Add("url", BuildUrl(file.FullName)); return simpleDataSet; }
public IEnumerable<SimpleDataSet> GetAllData(string indexType) { var config = FileIndexerConfig.Settings; var fullPath = HttpContext.Current.Server.MapPath(config.DirectoryToIndex); var directory = new DirectoryInfo(fullPath); var files = config.Recursive ? directory.GetFiles(config.SupportedFileTypes, SearchOption.AllDirectories) : directory.GetFiles(config.SupportedFileTypes); var dataSets = new List<SimpleDataSet>(); var i = 1; //unique id for each doc foreach (var file in files) { try { var simpleDataSet = new SimpleDataSet { NodeDefinition = new IndexedNode(), RowData = new Dictionary<string, string>() }; simpleDataSet = ExamineHelper.MapFileToSimpleDataIndexItem(file, simpleDataSet, i, indexType); dataSets.Add(simpleDataSet); } catch (Exception ex) { Log.Add(LogTypes.Error, i, "error processing file " + file.FullName + " " + ex); } i++; } return dataSets; }
public static SimpleDataSet MapFileToSimpleDataIndexItem(FileInfo file, SimpleDataSet simpleDataSet, int index, string indexType) { var lines = new List<string>(); lines.AddRange(File.ReadAllLines(file.FullName)); var headLine = string.Empty; var body = string.Empty; if (lines.Count > 0) { headLine = RemoveSpecialCharacters(lines[0]); lines.RemoveAt(0); body = RemoveSpecialCharacters(string.Join("", lines)); } simpleDataSet.NodeDefinition.NodeId = index; simpleDataSet.NodeDefinition.Type = indexType; simpleDataSet.RowData.Add("Body", body); simpleDataSet.RowData.Add("Title", headLine); simpleDataSet.RowData.Add("dateCreated", file.CreationTime.ToString("yyyy-MM-dd-HH:mm:ss")); simpleDataSet.RowData.Add("dateCreatedSearchAble", file.CreationTime.SerializeForLucene()); simpleDataSet.RowData.Add("Path", file.FullName); simpleDataSet.RowData.Add("searchAblePath", file.FullName.Replace("\\", " ").Replace(":", "")); simpleDataSet.RowData.Add("nodeTypeAlias", "document"); simpleDataSet.RowData.Add("url", BuildUrl(file.FullName)); return simpleDataSet; }
public SimpleDataSet CreateNewDocument(int id) { var ts = new TopicService(ApplicationContext.Current.DatabaseContext); var forumTopic = ts.QueryById(id); var simpleDataSet = new SimpleDataSet { NodeDefinition = new IndexedNode(), RowData = new Dictionary<string, string>() }; return MapTopicToSimpleDataIndexItem(forumTopic, simpleDataSet, forumTopic.Id, "forum"); }
public SimpleDataSet MapProjectToSimpleDataIndexItem(IPublishedContent project, SimpleDataSet simpleDataSet, string indexType, int karma, IEnumerable<WikiFile> files, int downloads, IEnumerable<string> compatVersions) { simpleDataSet.NodeDefinition.NodeId = project.Id; simpleDataSet.NodeDefinition.Type = indexType; var desciption = project.GetPropertyValue<string>("description"); if (!string.IsNullOrEmpty(desciption)) { simpleDataSet.RowData.Add("body", umbraco.library.StripHtml(desciption)); } simpleDataSet.RowData.Add("nodeName", project.Name); simpleDataSet.RowData.Add("updateDate", project.UpdateDate.ToString("yyyy-MM-dd HH:mm:ss")); simpleDataSet.RowData.Add("createDate", project.CreateDate.ToString("yyyy-MM-dd HH:mm:ss")); simpleDataSet.RowData.Add("nodeTypeAlias", "project"); simpleDataSet.RowData.Add("url", project.Url ); simpleDataSet.RowData.Add("uniqueId", project.GetPropertyValue<string>("packageGuid")); simpleDataSet.RowData.Add("worksOnUaaS", project.GetPropertyValue<string>("worksOnUaaS")); var imageFile = string.Empty; if (project.HasValue("defaultScreenshotPath")) { imageFile = project.GetPropertyValue<string>("defaultScreenshotPath"); } if(string.IsNullOrWhiteSpace(imageFile)) { var image = files.FirstOrDefault(x => x.FileType == "screenshot"); if (image != null) imageFile = image.Path; } //Clean up version data before its included in the index int o; var version = project.GetProperty("compatibleVersions").Value; var versions = version.ToString().ToLower() .Replace("nan", "") .Replace("saved", "") .Replace("v", "") .Trim(',').Split(',') .Where(x => int.TryParse(x, out o)) .Select(x => (decimal.Parse(x.PadRight(3, '0') ) / 100)); //popularity for sorting number = downloads + karma * 100; var pop = downloads + (karma * 100); simpleDataSet.RowData.Add("popularity", pop.ToString()); simpleDataSet.RowData.Add("karma", karma.ToString()); simpleDataSet.RowData.Add("downloads", downloads.ToString()); simpleDataSet.RowData.Add("image", imageFile); //now we need to add the versions and compat versions // first, this is the versions that the project has files tagged against simpleDataSet.RowData.Add("versions", string.Join(",", versions)); //then we index the versions that the project has actually been flagged as compatible against simpleDataSet.RowData.Add("compatVersions", string.Join(",", compatVersions)); return simpleDataSet; }
public IEnumerable<SimpleDataSet> GetAllData(string indexType) { var ts = new TopicService(ApplicationContext.Current.DatabaseContext); foreach (var topic in ts.QueryAll(maxCount:int.MaxValue)) { //Add the item to the index.. var simpleDataSet = new SimpleDataSet { NodeDefinition = new IndexedNode(), RowData = new Dictionary<string, string>() }; yield return MapTopicToSimpleDataIndexItem(topic, simpleDataSet, topic.Id, "forum"); } }
public static SimpleDataSet MapTopicToSimpleDataIndexItem(ReadOnlyTopic topic, SimpleDataSet simpleDataSet, int id, string indexType) { //First generate the accumulated comment text: var commentText = string.Empty; foreach (var currentComment in topic.Comments.Where(c => c.IsSpam == false)) commentText += currentComment.Body; var body = library.StripHtml(topic.Body + commentText); simpleDataSet.NodeDefinition.NodeId = id; simpleDataSet.NodeDefinition.Type = indexType; simpleDataSet.RowData.Add("body", body); if (!string.IsNullOrEmpty(commentText)) { simpleDataSet.RowData.Add("comments", commentText); } simpleDataSet.RowData.Add("nodeName", topic.Title); simpleDataSet.RowData.Add("updateDate", topic.Updated.ToString("yyyy-MM-dd HH:mm:ss")); simpleDataSet.RowData.Add("nodeTypeAlias", "forum"); simpleDataSet.RowData.Add("urlName", topic.UrlName); simpleDataSet.RowData.Add("createDate", topic.Created.ToString("yyyy-MM-dd HH:mm:ss")); simpleDataSet.RowData.Add("latestCommentId", topic.LatestComment.ToString()); simpleDataSet.RowData.Add("latestReplyAuthorId", topic.LatestReplyAuthor.ToString()); if (!string.IsNullOrEmpty(topic.LastReplyAuthorName)) { simpleDataSet.RowData.Add("latestReplyAuthorName", topic.LastReplyAuthorName); } simpleDataSet.RowData.Add("authorId", topic.MemberId.ToString()); if (!string.IsNullOrEmpty(topic.AuthorName)) { simpleDataSet.RowData.Add("authorName", topic.AuthorName); } simpleDataSet.RowData.Add("parentId", topic.ParentId.ToString()); simpleDataSet.RowData.Add("replies", topic.Replies.ToString()); simpleDataSet.RowData.Add("locked", topic.Locked.ToString()); simpleDataSet.RowData.Add("solved", topic.Answer.ToString()); simpleDataSet.RowData.Add("version", topic.Version.ToString()); return simpleDataSet; }
private void UpdateProjectExamineIndex(IPublishedContent content, int downloads) { var simpleDataSet = new SimpleDataSet { NodeDefinition = new IndexedNode(), RowData = new Dictionary<string, string>() }; var karma = Utils.GetProjectTotalVotes(content.Id); var files = WikiFile.CurrentFiles(content.Id); var compatVersions = Utils.GetProjectCompatibleVersions(content.Id); var simpleDataIndexer = (SimpleDataIndexer)ExamineManager.Instance.IndexProviderCollection["projectIndexer"]; simpleDataSet = ((ProjectNodeIndexDataService)simpleDataIndexer.DataService) .MapProjectToSimpleDataIndexItem(content, simpleDataSet, "project", karma, files, downloads, compatVersions); var xml = simpleDataSet.RowData.ToExamineXml(simpleDataSet.NodeDefinition.NodeId, simpleDataSet.NodeDefinition.Type); simpleDataIndexer.ReIndexNode(xml, "project"); }
public IEnumerable<SimpleDataSet> GetAllData(string indexType) { //Before getting all data, we need to make sure that the docs are available from GitHub ZipDownloader.EnsureGitHubDocs(); var config = DocumentationIndexConfig.Settings; var fullPath = HttpContext.Current.Server.MapPath(config.DirectoryToIndex); var directory = new DirectoryInfo(fullPath); var files = config.Recursive ? directory.GetFiles(config.SupportedFileTypes, SearchOption.AllDirectories) : directory.GetFiles(config.SupportedFileTypes); var i = 0; //unique id for each doc foreach (var file in files) { i++; var simpleDataSet = new SimpleDataSet { NodeDefinition = new IndexedNode(), RowData = new Dictionary<string, string>() }; simpleDataSet = ExamineHelper.MapFileToSimpleDataIndexItem(file, simpleDataSet, i, indexType); yield return simpleDataSet; } }
public IEnumerable<SimpleDataSet> GetAllData(string indexType) { var data = new List<SimpleDataSet>(); var count = 1; LogHelper.Info<BookshelfExamineDataService>("Building index..."); foreach (var bookPath in _getBooksAsDirectories()) { var files = bookPath.GetFilesRecursively(Constants.ALLOWED_FILE_EXTENSIONS); foreach (var file in files) { var dataset = new SimpleDataSet() { NodeDefinition = new IndexedNode() { Type = "Bookshelf", NodeId = count } }; dataset.RowData = new Dictionary<string, string>() { {"book", Path.GetFileName(bookPath)}, {"path", bookPath.ToWebPath()}, {"title", Path.GetFileNameWithoutExtension(file)}, {"text", File.ReadAllText(file)}, {"url", "/umbraco/#/UmbracoBookshelf/UmbracoBookshelfTree/file/" + file.ToWebPath().Replace("%2F", "%252F").Replace("%20F", "%2520F")} //total hack job here b/c of some sort of double encoding somewhere }; data.Add(dataset); count++; } } return data; }
public SimpleDataSet MapProjectToSimpleDataIndexItem(IPublishedContent project, SimpleDataSet simpleDataSet, string indexType, int projectVotes, WikiFile[] files, int downloads, IEnumerable<string> compatVersions) { var isLive = project.GetPropertyValue<bool>("projectLive"); var isApproved = project.GetPropertyValue<bool>("approved"); var minimumVersionStrict = string.Empty; var currentFileId = project.GetPropertyValue<int>("file"); if (currentFileId > 0) { var currentFile = files.FirstOrDefault(x => x.Id == currentFileId); if (currentFile != null) minimumVersionStrict = currentFile.MinimumVersionStrict; } simpleDataSet.NodeDefinition.NodeId = project.Id; simpleDataSet.NodeDefinition.Type = indexType; simpleDataSet.RowData.Add("body", project.GetPropertyValue<string>("description")); simpleDataSet.RowData.Add("nodeName", project.Name); simpleDataSet.RowData.Add("categoryFolder", project.Parent.Name.ToLowerInvariant().Trim()); simpleDataSet.RowData.Add("updateDate", project.UpdateDate.ToString("yyyy-MM-dd HH:mm:ss")); simpleDataSet.RowData.Add("createDate", project.CreateDate.ToString("yyyy-MM-dd HH:mm:ss")); simpleDataSet.RowData.Add("nodeTypeAlias", "project"); simpleDataSet.RowData.Add("url", project.Url); simpleDataSet.RowData.Add("uniqueId", project.GetPropertyValue<string>("packageGuid")); simpleDataSet.RowData.Add("worksOnUaaS", project.GetPropertyValue<string>("worksOnUaaS")); simpleDataSet.RowData.Add("minimumVersionStrict", minimumVersionStrict); var imageFile = string.Empty; if (project.HasValue("defaultScreenshotPath")) { imageFile = project.GetPropertyValue<string>("defaultScreenshotPath"); } if (string.IsNullOrWhiteSpace(imageFile)) { var image = files.FirstOrDefault(x => x.FileType == "screenshot"); if (image != null) imageFile = image.Path; } //Clean up version data before its included in the index, the reason we have to do this // is due to the way the version data is stored, you can see it in uVersion.config - it's super strange // because of the 3 digit nature but when it doesn't end with a '0' it's actually just the major/minor version // so we have to do all of this parsing. var version = project.GetPropertyValue<string>("compatibleVersions") ?? string.Empty; var cleanedVersions = version.ToLower() .Replace("nan", "") .Replace("saved", "") .Replace("v", "") .Trim(',') .Split(',') //it's stored as an int like 721 (for version 7.2.1) .Where(x => x.Length <= 3 && x.Length > 0) //pad it out to 3 digits .Select(x => x.PadRight(3, '0')) .Select(x => { int o; if (int.TryParse(x, out o)) { //if it ends with '0', that means it's a X.X.X version // if it does not end with '0', that means that the last 2 digits are the // Minor part of the version return x.EndsWith("0") ? string.Format("{0}.{1}.{2}", x[0], x[1], 0) : string.Format("{0}.{1}.{2}", x[0], x.Substring(1), 0); } return null; }) .Where(x => x != null); var cleanedCompatVersions = compatVersions.Select(x => x.Replace("nan", "") .Replace("saved", "") .Replace("nan", "") .Replace("v", "") .Replace(".x", "") .Trim(',')); //popularity for sorting number = downloads + karma * 100; //TODO: Change score so that we take into account: // - recently updated // - works on latest umbraco versions // - works on uaas // - has a forum // - has source code link // - open for collab / has collaborators // - download count in a recent timeframe - since old downloads should count for less var pop = downloads + (projectVotes * 100); simpleDataSet.RowData.Add("popularity", pop.ToString()); simpleDataSet.RowData.Add("karma", projectVotes.ToString()); simpleDataSet.RowData.Add("downloads", downloads.ToString()); simpleDataSet.RowData.Add("image", imageFile); var packageFiles = files.Count(x => x.FileType == "package"); simpleDataSet.RowData.Add("packageFiles", packageFiles.ToString()); simpleDataSet.RowData.Add("projectLive", isLive ? "1" : "0"); simpleDataSet.RowData.Add("approved", isApproved ? "1" : "0"); //now we need to add the versions and compat versions // first, this is the versions that the project has files tagged against simpleDataSet.RowData.Add("versions", string.Join(",", cleanedVersions)); //then we index the versions that the project has actually been flagged as compatible against simpleDataSet.RowData.Add("compatVersions", string.Join(",", cleanedCompatVersions)); return simpleDataSet; }
public IEnumerable<SimpleDataSet> GetAllData(string indexType) { var umbContxt = EnsureUmbracoContext(); var projects = umbContxt.ContentCache.GetByXPath("//Community/Projects//Project [projectLive='1']").ToArray(); var allProjectIds = projects.Select(x => x.Id).ToArray(); var allProjectKarma = Utils.GetProjectTotalVotes(); var allProjectWikiFiles = WikiFile.CurrentFiles(allProjectIds); var allProjectDownloads = Utils.GetProjectTotalDownload(); var allCompatVersions = Utils.GetProjectCompatibleVersions(); foreach (var project in projects) { LogHelper.Debug(this.GetType(), "Indexing " + project.Name); var simpleDataSet = new SimpleDataSet { NodeDefinition = new IndexedNode(), RowData = new Dictionary<string, string>() }; var projectDownloads = allProjectDownloads.ContainsKey(project.Id) ? allProjectDownloads[project.Id] : 0; var projectKarma = allProjectKarma.ContainsKey(project.Id) ? allProjectKarma[project.Id] : 0; var projectFiles = allProjectWikiFiles.ContainsKey(project.Id) ? allProjectWikiFiles[project.Id].ToArray() : new WikiFile[] { }; var projectVersions = allCompatVersions.ContainsKey(project.Id) ? allCompatVersions[project.Id] : Enumerable.Empty<string>(); yield return MapProjectToSimpleDataIndexItem(project, simpleDataSet, indexType, projectKarma, projectFiles, projectDownloads, projectVersions); } }