private List <HistoryInfos> WriteRulesJsonFileHistory(SpRulesDataSet data) { _log.LogInformation($"Writing History Json File"); List <HistoryInfos> infos = new List <HistoryInfos>(); foreach (var rule in data.Rules) { var rulePaths = new RulePaths(_config, rule); var creationData = rule.Versions .Where(v => !string.IsNullOrWhiteSpace(v.IntroText) || !string.IsNullOrWhiteSpace(v.Content)) // filter out where we have no content .OrderBy(v => v.ModifiedUtc).FirstOrDefault(); var lastModifiedData = rule.Versions .Where(v => !string.IsNullOrWhiteSpace(v.IntroText) || !string.IsNullOrWhiteSpace(v.Content)) // filter out where we have no content .OrderByDescending(v => v.ModifiedUtc).FirstOrDefault(); var info = new HistoryInfos { file = rulePaths.RuleFileRelative.Replace("\\", "/"), created = creationData.ModifiedUtc, createdBy = creationData.ModifiedByDisplayName, createdByEmail = creationData.ModifiedByEmail, lastUpdated = lastModifiedData.ModifiedUtc, lastUpdatedBy = lastModifiedData.ModifiedByDisplayName, lastUpdatedByEmail = lastModifiedData.ModifiedByEmail }; infos.Add(info); } return(infos); }
private void ProcessImageUrls(SpRulesDataSet data) { foreach (var rule in data.Rules) { UpdateImageUrlsInRule(rule); } foreach (var category in data.Categories) { HashSet <string> images = new HashSet <string>(); if (category.ImageUrls.Count > 0) { images.UnionWith(category.ImageUrls); } var imgsIntro = HtmlHelper.GetImageUrls(category.IntroText); if (imgsIntro.Count > 0) { images.UnionWith(imgsIntro); } var imgsContent = HtmlHelper.GetImageUrls(category.Content); if (imgsContent.Count > 0) { images.UnionWith(imgsContent); } UpdateImageUrlsInCategory(category, images); } }
private List <HistoryInfos> WriteCategoriesJsonFileHistory(SpRulesDataSet data) { _log.LogInformation($"Writing History Json File"); List <HistoryInfos> infos = new List <HistoryInfos>(); foreach (var cat in data.Categories) { if (cat.Versions.Count == 0) { continue; } var catPath = new CategoryPaths(_config, cat); var creationData = cat.Versions .OrderBy(v => v.ModifiedUtc).FirstOrDefault(); var lastModifiedData = cat.Versions .OrderByDescending(v => v.ModifiedUtc).FirstOrDefault(); var info = new HistoryInfos { file = catPath.CategoryFileRelative.Replace("\\", "/"), created = creationData.ModifiedUtc, createdBy = creationData.ModifiedByDisplayName, createdByEmail = creationData.ModifiedByEmail, lastUpdated = lastModifiedData.ModifiedUtc, lastUpdatedBy = lastModifiedData.ModifiedByName, lastUpdatedByEmail = lastModifiedData.ModifiedByDisplayName }; infos.Add(info); } return(infos); }
public SpRulesDataSet Import() { try { var data = new SpRulesDataSet(); using (var spClientContext = CreateClientContext(_appSettings)) { _termColl = initTermCollection(spClientContext); LoadCategories(data, spClientContext); LoadPages(data, spClientContext); CullEmptyCategories(data); ScrapeHomePage(data).GetAwaiter().GetResult(); ScrapeCategoryPages(data).GetAwaiter().GetResult(); ScrapeCategoryPages(data, true).GetAwaiter().GetResult(); LoadUrlTerms(data, spClientContext); } return(data); } catch (Exception ex) { _log.LogError(ex, "error importing from sharepoint"); throw; } }
private void WriteIndexPage(SpRulesDataSet data) { var indexPath = Path.Combine(_config.CategoriesFolder, "index.md"); var indexPathFull = Path.Combine(_config.TargetRepository, indexPath); var markdown = $@"--- {YamlSerializer.Serialize(new IndexMdModel(data))} --- "; _log.LogInformation("writing index"); using (var sw = new StreamWriter(indexPathFull, false)) { sw.Write(markdown); sw.Flush(); } if (_config.ProcessHistory) { GitCommit( _config.TargetRepository, $"Extracted from SharePoint", new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), "*"); } }
private void LoadUrlTerms(SpRulesDataSet dataSet, ClientContext ctx) { var targetUrl = ""; //This is the URL to the .aspx page var friendlyUrl = ""; foreach (var term in _termColl) { term.LocalCustomProperties.TryGetValue("_Sys_Nav_FriendlyUrlSegment", out friendlyUrl); term.LocalCustomProperties.TryGetValue("_Sys_Nav_TargetUrl", out targetUrl); if (!String.IsNullOrEmpty(targetUrl)) { targetUrl = targetUrl.Replace("~sitecollection/Pages/", ""); } foreach (RulePage rulePage in dataSet.Rules.Where(r => r.Title.Equals(term.Name) || r.Name.Equals(term.Name.ToLower().Replace(' ', '-')) || r.FileName.Equals(targetUrl))) { if (!String.IsNullOrEmpty(friendlyUrl)) { //If the rule page matches the term and there is an avaible value for the friendly URL, add it to the redirects. rulePage.Redirects.Add(friendlyUrl); } if (!String.IsNullOrEmpty(term.Name) && (!rulePage.Name.ToSharePointUri().Equals(term.Name.ToSharePointUri()))) { //Sometimes we won't have a friendly URL and we need to calculate a URL from the term name. rulePage.Redirects.Add(term.Name.ToSharePointUri()); } } } }
private Category LoadRuleSummaryPage(SpRulesDataSet dataSet, ListItem page, ClientContext ctx) { var cat = dataSet.CategoryByTitle(page["Title"]?.ToString()); if (cat == null) { _log.LogWarning("failed to match rule summary item {SummaryPageTitle} to a category from the term store", page["Title"]); return(null); } cat.Content = page["PublishingPageContent"]?.ToString(); cat.IntroText = page["RuleSummaryIntro"]?.ToString(); cat.PageGuid = page["GUID"]?.ToString(); cat.ImageUrls.UnionWith(GetImageUrls(cat.Content)); ExtractWebParts(cat, page, ctx); if (cat.Content != null) { MatchEvaluator matchEval = new MatchEvaluator(ReplaceRelativeURl); cat.Content = Regex.Replace(cat.Content, @"(""(\/_layouts\/15\/FIXUPREDIRECT.ASPX).*""\s)", matchEval); } return(cat); }
private void LoadPages(SpRulesDataSet dataSet, ClientContext ctx) { _log.LogInformation("Loading Pages..."); var web = ctx.Web; List oList = ctx.Web.Lists.GetByTitle("Pages"); ctx.Load(oList); ctx.ExecuteQuery(); _log.LogInformation("got list id" + oList.Id); CamlQuery camlQuery = new CamlQuery(); camlQuery.ViewXml = "<View><Query><Where></Where></Query><RowLimit>5000</RowLimit></View>"; ListItemCollection items = oList.GetItems(camlQuery); ctx.Load(items); ctx.ExecuteQuery(); int count = 0; foreach (var item in items) { count++; var contentType = item.ContentType; ctx.Load(item); ctx.Load(contentType); var file = ctx.Web.GetFileByServerRelativeUrl($"/Pages/{item["FileLeafRef"]}"); ctx.Load(file); ctx.ExecuteQuery(); if (item.ContentType.Name == "RulePage") { _log.LogInformation($"Rule Page {item["Title"]} - {count} of {items.Count}"); var rulePage = LoadRulePage(dataSet, ctx, item); if (rulePage != null) { LoadContentHistory(rulePage, ctx, item); } } else if (item.ContentType.Name == "RuleSummaryPage") { _log.LogInformation("Summary Page " + item["Title"]); var category = LoadRuleSummaryPage(dataSet, item, ctx); if (category != null) { LoadContentHistory(category, ctx, item); } } else { _log.LogInformation("unhandled page content type {contentType}", item.ContentType.Name); } // DEBUG uncomment this for testing with a smaller amount of data //if (count > 100) break; } }
/// <summary> /// we screen-scrape every category summary item to get the ordering of rule items within a category /// </summary> /// <param name="dataSet">data set we're building</param> public async Task ScrapeCategoryPages(SpRulesDataSet dataSet, bool archived = false) { foreach (var cat in dataSet.Categories) { if (cat.Uri == null) { _log.LogWarning("Category {Title} has no Uri", cat.Title); continue; } // for sanity-checking, we only search the subset of rules that are already associated with category var categoryRules = dataSet.Rules .Where(r => r.Categories.Select(c => c.TermStoreId).ToList().Contains(cat.TermStoreId)) .ToList(); if (!categoryRules.Any()) { _log.LogWarning("category {Title} at {Uri} has no rules", cat.Title, cat.Uri); continue; } _log.LogInformation("Screen scrape of category {CatTitle}", cat.Title); HtmlWeb web = new HtmlWeb(); var uri = cat.Uri.ToString(); if (archived) { uri += "?showarchived=True"; } var doc = await web.LoadFromWebAsync(_appSettings.SharePointUrl + uri); var ruleNodes = doc.DocumentNode.SelectNodes( ".//*[@id='ctl00_PlaceHolderMain_RuleSummaryUC_SSWRuleSummaryUCDiv']/div/ol/li"); if (ruleNodes == null) { _log.LogWarning("no rule links found for {Title} on item {Url}", cat.Title, cat.Uri); continue; } foreach (var ruleNode in ruleNodes) { var linkNode = ruleNode.SelectSingleNode("h2/a"); var title = linkNode.InnerText.Trim(); var ruleData = categoryRules.FirstOrDefault(r => r.Title.Equals(title, StringComparison.InvariantCultureIgnoreCase));; if (ruleData == null) { _log.LogWarning("Failed to find rule {RuleTitle} under category {CategoryTitle}", title, cat.Title); _log.LogWarning("Available rules titles: {RuleTitles}", categoryRules.Select(r => r.Title).ToList()); } else { cat.Rules.Add(ruleData); } } // end foreach rule link in summary item } // end foreach category }
private void LoadRuleSummaryPage(SpRulesDataSet dataSet, JToken page) { var cat = dataSet.CategoryByTitle(page["Title"].NpValue <string>()); if (cat == null) { _log.LogWarning("failed to match rule summary page {SummaryPageTitle} to a category from the term store", page["Title"].NpValue <string>()); return; } cat.Content = page["PublishingPageContent"].NpValue <string>(); cat.PageGuid = page["GUID"].NpValue <string>(); }
public async Task <SpRulesDataSet> Import() { var dataSet = new SpRulesDataSet(); await LoadCategoriesJson(dataSet); await LoadPagesJson(dataSet); CullEmptyCategories(dataSet); await ScrapeHomePage(dataSet); await ScrapeCategoryPages(dataSet); //_log.LogWarning("{@DataSet}", dataSet); return(dataSet); }
private static void LogRuleSetDetails(ILogger <Program> log, SpRulesDataSet data) { log.LogInformation($"loaded {data.Rules.Count} rules across {data.Categories.Count} categories"); foreach (var parent in data.ParentCategories) { log.LogInformation("= {ParentTitle}", parent.Title); foreach (var cat in parent.Categories) { log.LogInformation("=== {CatTitle}", cat.Title); foreach (var rule in cat.Rules) { log.LogInformation("======= {RuleTitle}", rule.Title); } } } }
private void WriteCategoryPages(SpRulesDataSet data) { foreach (var cat in data.Categories) { if (!cat.Rules.Any()) { continue; } var catPaths = new CategoryPaths(_config, cat); if (_config.ProcessHistory) { ProcessCategoryHistory(cat); } var html = $@" {cat.IntroText} {cat.Content} "; string markdown = $@"--- {YamlSerializer.Serialize(new CategoryMdModel(cat))} --- { MarkdownConverter.Convert(html)} "; _log.LogInformation($"writing {catPaths.CategoryFileFull}"); using (var sw = new StreamWriter(catPaths.CategoryFileFull, false)) { sw.Write(markdown); sw.Flush(); } if (_config.ProcessHistory) { GitCommit( _config.TargetRepository, $"Extracted from Sharepoint to Git", new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), catPaths.CategoryFileRelative); } } }
/// <summary> /// SharePoint sucks so hard that I abandoned attempting to talk to the term store and just scraped details from the rules site html instead /// </summary> /// <param name="dataSet">the dataset we're building</param> public async Task ScrapeHomePage(SpRulesDataSet dataSet) { _log.LogInformation("Scraping category details from home item..."); HtmlWeb web = new HtmlWeb(); var doc = await web.LoadFromWebAsync(_appSettings.SharePointUrl); var container = doc.DocumentNode.SelectSingleNode( ".//*[@id='ctl00_PlaceHolderMain_RuleLandingUC_SSWLandingUCDiv']/div[@class='ruleSortDiv']"); var parentCategoryNodes = container.SelectNodes("div"); foreach (var parentCatNode in parentCategoryNodes) { var title = parentCatNode.SelectSingleNode("h2").InnerText.Trim(); if (string.IsNullOrWhiteSpace(title)) { continue; } var parentCat = new ParentCategory() { Title = title }; var categoriesData = dataSet.Categories .Where(c => c.ParentCategoryTitle.Equals(parentCat.Title, StringComparison.InvariantCultureIgnoreCase)) .ToList(); foreach (var catNode in parentCatNode.SelectNodes("ol/li/a[@class='RuleSortList']")) { var catTitle = catNode.InnerText.Trim(); var catUrl = catNode.Attributes["href"].Value; var catData = categoriesData.FirstOrDefault(c => c.Title.Equals(catTitle, StringComparison.InvariantCultureIgnoreCase)); if (catData == null) { _log.LogWarning("failed to find {CatTitle} in list {CategoryList}", catTitle, categoriesData.Select(c => c.Title).ToList()); continue; } catData.Uri = new Uri(catUrl, UriKind.Relative); parentCat.Categories.Add(catData); //_log.LogInformation("=== {Category}", catData.Title); } // end foreach category dataSet.ParentCategories.Add(parentCat); } // end foreach parent }
/// <summary> /// process related rules for a rule page /// </summary> /// <param name="item"></param> /// <param name="dataSet"></param> /// <param name="rulePage"></param> private void RulePageRelated(ListItem item, SpRulesDataSet dataSet, RulePage rulePage) { //Check if the rule has any keywords for related rules if (string.IsNullOrEmpty(item["RulesKeyWords"]?.ToString())) { return; } string url = rulePage.Name; var web = new HtmlWeb(); WebClient webClient = new WebClient(); var ruleKeyWords = item["RulesKeyWords"]?.ToString().Split(';'); XNamespace xmlns = "http://schemas.microsoft.com/ado/2007/08/dataservices"; foreach (var keyWord in ruleKeyWords) { try { var relatedUri = _appSettings.SharePointUrl + "/_api/web/lists/getByTitle('Pages')/Items?$filter=GUID ne '" + rulePage.Guid + "' and substringof('" + keyWord + "',RulesKeyWords)&$select=GUID,Title,RulesKeyWords,FileRef"; var relatedXmlPageString = webClient.DownloadString(relatedUri); var xmlDoc = XDocument.Parse(relatedXmlPageString); var relatedRulesNodes = xmlDoc.Descendants(xmlns + "RulesKeyWords"); foreach (var node in relatedRulesNodes) { var keywordsArray = node.Value.Trim().Split(';'); if (keywordsArray.Contains(keyWord)) { var relatedRuleTitle = node.Parent.Descendants(xmlns + "Title").FirstOrDefault(); var relatedRuleUri = relatedRuleTitle.Value.ToFileName(); rulePage.Related.Add(relatedRuleUri); } } } catch (WebException ex) { _log.LogWarning("ignored http error when fetching related rules: ", ex); } } rulePage.Related = rulePage.Related.Distinct().ToList(); }
public void LoadCategories(SpRulesDataSet dataSet, ClientContext ctx) { var items = GetAllItems(ctx, "TaxonomyHiddenList"); foreach (var item in items) { _log.LogInformation("Category: {Category}", item["Title"]); if (item["Title"] != null) { // we will allow duplicate names through but will clear out empty categories later dataSet.Categories.Add(new Category() { TermStoreId = Convert.ToInt32(item["ID"]), TermStoreGuid = item["IdForTerm"].ToString(), Title = item["Title"].ToString().Trim(), ParentCategoryTitle = ExtractParentCategory(item["Path"].ToString()), }); } } }
private async Task LoadCategoriesJson(SpRulesDataSet dataSet) { var taxonomyJson = await GetJsonListByTitle("TaxonomyHiddenList"); JObject doc = JObject.Parse(taxonomyJson); var cats = doc.GetValue("value").Children(); foreach (var cat in cats) { //if (dataSet.Categories.Any(c => c.Title.Equals(cat["Title"].Value<string>().Trim(), StringComparison.InvariantCultureIgnoreCase))) //{ // throw new Exception("Duplicate Category: "+cat["Title"].Value<string>().Trim()); //} dataSet.Categories.Add(new Category() { TermStoreId = cat["Id"].Value <int>(), TermStoreGuid = cat["IdForTerm"].Value <string>(), Title = cat["Title"].Value <string>().Trim(), ParentCategoryTitle = ExtractParentCategory(cat["Path"].Value <string>()), }); } }
private void FetchImages(SpRulesDataSet data) { foreach (var rule in data.Rules.Where(r => r.ImageUrls.Count > 0)) { FetchImages(rule.ImageUrls, new RulePaths(_config, rule).RuleFolderFull); FetchImages(HtmlHelper.GetImageUrls(rule.IntroText), new RulePaths(_config, rule).RuleFolderFull); FetchImages(HtmlHelper.GetImageUrls(rule.Content), new RulePaths(_config, rule).RuleFolderFull); } foreach (var category in data.Categories) { if (category.ImageUrls.Count > 0) { FetchImages(category.ImageUrls, new CategoryPaths(_config, category).CategoryFolderFull); } var imgsIntro = HtmlHelper.GetImageUrls(category.IntroText); if (imgsIntro.Count > 0) { FetchImages(imgsIntro, new CategoryPaths(_config, category).CategoryFolderFull); } var imgsContent = HtmlHelper.GetImageUrls(category.Content); if (imgsContent.Count > 0) { FetchImages(imgsContent, new CategoryPaths(_config, category).CategoryFolderFull); } } if (_config.ProcessHistory) { GitCommit( _config.TargetRepository, $"Extracted Images from Sharepoint to Git", new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), "*"); } }
private async Task LoadPagesJson(SpRulesDataSet dataSet) { var pagesJson = await GetJsonListByTitle("Pages"); JObject doc = JObject.Parse(pagesJson); var pages = doc.GetValue("value").Children(); foreach (var page in pages) { if (page["ContentTypeId"].NpValue <string>() == RulePageContentTypeId) { LoadRulePage(dataSet, page); } else if (page["ContentTypeId"].NpValue <string>() == RuleSummaryPageContentTypeId) { LoadRuleSummaryPage(dataSet, page); } else { _log.LogWarning("unhandled page content type {contentType}", page["PublishingPageLayout"]["Description"].NpValue <string>()); } } }
/// <summary> /// process authors for a rule page /// </summary> /// <param name="item"></param> /// <param name="dataSet"></param> /// <param name="rulePage"></param> private void RulePageAuthors(ListItem item, SpRulesDataSet dataSet, RulePage rulePage) { var employeeRefs = item["EmployeeLookup"] as FieldLookupValue[]; if (employeeRefs == null || employeeRefs.Length < 1) { return; } foreach (var employeeRef in employeeRefs) { var employee = dataSet.Employees.FirstOrDefault(e => e.Id == employeeRef.LookupId); if (employee == null) { employee = new Employee() { Id = employeeRef.LookupId, Title = employeeRef.LookupValue }; dataSet.Employees.Add(employee); } rulePage.Employees.Add(employee); } }
private void WriteParentCategories(SpRulesDataSet data) { foreach (var pc in data.ParentCategories) { var pcFolder = pc.Title.ToFileName(); var pcFile = Path.Combine(pcFolder, "index.md"); var pcFolderFull = Path.Combine(_config.CategoriesFolderFull, pcFolder); var pcFileFull = Path.Combine(_config.CategoriesFolderFull, pcFile); if (!Directory.Exists(pcFolderFull)) { Directory.CreateDirectory(pcFolderFull); } var markdown = $@"--- {YamlSerializer.Serialize(new ParentCategoryMdModel(pc))} --- "; _log.LogInformation($"writing {pcFileFull}"); using (var sw = new StreamWriter(pcFileFull, false)) { sw.Write(markdown); sw.Flush(); } if (_config.ProcessHistory) { GitCommit( _config.TargetRepository, $"Extracted from SharePoint", new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), "*"); } } }
private void LoadRulePage(SpRulesDataSet dataSet, JToken page) { if (page["Title"].IsNull()) { return; } var ruleData = new RulePage() { Title = page["Title"].Value <string>().Trim(), Id = page["Id"].Value <int>(), Content = page["PublishingPageContent"].NpValue <string>(), IntroText = page["RuleContentTop"].NpValue <string>(), CreatedUtc = page["Created"].Value <DateTime>(), ModifiedUtc = page["Modified"].Value <DateTime>(), Guid = page["GUID"].Value <string>() }; // follow metadata -> term store reference to set rule->category relationship // we will use this relationship with screen scraping later to set ordered category->rules list foreach (var catRef in page["RuleCategoriesMetadata"].Children()) { var termStoreRef = catRef["TermGuid"].NpValue <string>(); var catData = dataSet.Categories.FirstOrDefault(c => c.TermStoreGuid.Equals(termStoreRef, StringComparison.InvariantCultureIgnoreCase)); if (catData == null) { _log.LogWarning("Failed to resolve category term store giud {TermStoreRef} for rule {RuleTitle}", termStoreRef, ruleData.Title); } else { ruleData.Categories.Add(catData); } } // end foreach category reference dataSet.Rules.Add(ruleData); }
private void WriteRules(SpRulesDataSet data) { _log.LogInformation($"Writing {data.Rules.Count} rules"); // process rules foreach (var rule in data.Rules) { //Check for Related Rules if (rule.Related.Count > 0) { var tempRelated = rule.Related; foreach (var relatedRule in tempRelated) { //Check to make sure it's the correct URI var ruleUri = data.Rules.Where(r => r.Redirects.Contains(relatedRule)).ToList(); if (ruleUri.Count > 0) { //Update the Uri in the Related rules list rule.Related = rule.Related.Select(s => s.Replace(relatedRule, ruleUri[0].GetRuleUri())).ToList(); } } var relatedRules = rule.Related; } if (!String.IsNullOrEmpty(rule.IntroText)) { rule.IntroText = Helpers.EncodedHtmlTags.Decode(rule.IntroText); } if (!String.IsNullOrEmpty(rule.Content)) { rule.Content = Helpers.EncodedHtmlTags.Decode(rule.Content); } WriteRule(rule); } }
private void CullEmptyCategories(SpRulesDataSet dataSet) { var allCategoriesWithRules = dataSet.Rules.SelectMany(r => r.Categories).Distinct(); dataSet.Categories = dataSet.Categories.Where(c => allCategoriesWithRules.Contains(c)).ToList(); }
private RulePage LoadRulePage(SpRulesDataSet dataSet, ClientContext ctx, ListItem item) { if (string.IsNullOrEmpty(item["Title"]?.ToString())) { return(null); } var rulePage = new RulePage() { ArchivedReason = item["ObsoleteReason"]?.ToString(), Title = item["Title"].ToString().Trim(), Id = Convert.ToInt32(item["ID"]), Content = item["PublishingPageContent"]?.ToString(), IntroText = item["RuleContentTop"]?.ToString(), RulesKeyWords = item["RulesKeyWords"]?.ToString(), CreatedUtc = (DateTime)item["Created"], ModifiedUtc = (DateTime)item["Modified"], Guid = item["GUID"].ToString(), FileName = item["FileLeafRef"].ToString() }; RulePageAuthors(item, dataSet, rulePage); RulePageRelated(item, dataSet, rulePage); rulePage.ImageUrls.UnionWith(GetImageUrls(rulePage.IntroText)); rulePage.ImageUrls.UnionWith(GetImageUrls(rulePage.Content)); // I could not work out how to drill into the RuleCategoriesMetaData object other than serializing to json and parsing via JObject var metadataJason = JsonConvert.SerializeObject(item["RuleCategoriesMetadata"]); // follow metadata -> term store reference to set rule->category relationship var jArray = JArray.Parse(metadataJason); foreach (var elt in jArray) { var termGuid = elt["TermGuid"].NpValue <string>(); var catData = dataSet.Categories.FirstOrDefault(c => c.TermStoreGuid.Equals(termGuid, StringComparison.InvariantCultureIgnoreCase)); if (catData != null) { rulePage.Categories.Add(catData); } } /* var jObject = JObject.Parse(metadataJason); * * foreach (var reference in jObject["_Child_Items_"].Children()) * { * //_log.LogInformation("link to {REF}", reference); * * var termGuid = reference["TermGuid"].NpValue<string>(); * var catData = dataSet.Categories.FirstOrDefault(c => * c.TermStoreGuid.Equals(termGuid, StringComparison.InvariantCultureIgnoreCase)); * if (catData != null) * { * rulePage.Categories.Add(catData); * } * }*/ if (rulePage.Content != null) { MatchEvaluator matchEval = new MatchEvaluator(ReplaceRelativeURl); rulePage.Content = Regex.Replace(rulePage.Content, @"(""(\/_layouts\/15\/FIXUPREDIRECT.ASPX)[^""]*"")", matchEval); } dataSet.Rules.Add(rulePage); return(rulePage); }
public void WriteMarkdown(SpRulesDataSet data) { try { if (!Directory.Exists(Path.Combine(_config.RulesFolderFull))) { Directory.CreateDirectory(_config.RulesFolderFull); } if (!Directory.Exists(Path.Combine(_config.CategoriesFolderFull))) { Directory.CreateDirectory(_config.CategoriesFolderFull); } if (!Directory.Exists(Path.Combine(_config.AssetsFolderFull))) { Directory.CreateDirectory(_config.AssetsFolderFull); } if (_config.FetchImages) { FetchImages(data); } ProcessImageUrls(data); if (_config.WriteCategories) { WriteIndexPage(data); WriteParentCategories(data); WriteCategoryPages(data); } if (_config.WriteRules) { WriteRules(data); } if (_config.WriteJsonFileHistory) { List <HistoryInfos> list = new List <HistoryInfos>(); list = WriteRulesJsonFileHistory(data); list = list.Concat(WriteCategoriesJsonFileHistory(data)).ToList(); string json = JsonConvert.SerializeObject(list); //write string to file string fileName = "history.json"; System.IO.File.WriteAllText(Path.Combine(_config.TargetRepository, "history.json"), json); //commit file GitCommit( _config.TargetRepository, $"Extracted from Sharepoint to Git", new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), new LibGit2Sharp.Signature("SSW.Rules.SharePointExtractor", "*****@*****.**", DateTime.UtcNow), fileName); } } catch (Exception ex) { _log.LogError(ex, "Exception thrown while writing output"); throw new ApplicationException( "Error writing markdown", ex); } }
public IndexMdModel(SpRulesDataSet data) { Index = data.ParentCategories.Select(pc => pc.Title.ToFileName()).ToList(); }