public static ItemBody Parse(HtmlNode table) { var itemBody = new ItemBody(); foreach (var p in table.SelectNodes(".//p")) { var answer = p.SelectNodes(".//span")? .FirstOrDefault( x => StringUtilities.MatchesCharacterInRange(x.InnerText, 'A', 'D'))? .InnerText.Trim(); if (!string.IsNullOrEmpty(answer)) { Logger.Trace($"Parsing answer for {answer}"); itemBody.AnswerChoices.Add(answer, HtmlNodeUtilities.BodyElementFromNode(ExtractionSettings.Input, p)); if (!itemBody.AnswerChoices[answer].IsResource()) { var document = new HtmlDocument(); document.LoadHtml(itemBody.AnswerChoices[answer].Text); var span = document.DocumentNode.SelectNodes("//span").Last(); itemBody.AnswerChoices[answer].Text = span.OuterHtml.Trim(); } } else { Logger.Trace("Parsing item body"); itemBody.Elements.Add(HtmlNodeUtilities.BodyElementFromNode(ExtractionSettings.Input, p)); } } return(itemBody); }
public static Passage Parse(IList <HtmlNode> nodes) { var passage = new Passage(); foreach (var node in nodes) { var nodeInfo = node.ChildNodes.FirstOrDefault( x => x.Name.Equals("span", StringComparison.OrdinalIgnoreCase))?.InnerText; if (passage.Metadata.Values.Any(string.IsNullOrEmpty)) { if (string.IsNullOrEmpty(nodeInfo)) { Logger.LogError(new ErrorReportItem { Location = $"Passage Parsing: {node.OuterHtml}", Severity = LogLevel.Error }, "Unable to retrieve nodeInfo while parsing passage"); continue; } if (nodeInfo.Contains("Passage") && nodeInfo.Contains("Code")) { passage.Metadata.AddMetadata("PassageCode", nodeInfo.RestrictToSingleWhiteSpace().Split(':').LastOrDefault()); Logger.Trace("Passage code parsed"); } else if (nodeInfo.Contains("Passage") && nodeInfo.Contains("Title")) { passage.Metadata.AddMetadata("PassageTitle", nodeInfo.RestrictToSingleWhiteSpace().Split(':').LastOrDefault()); Logger.Trace("Passage title parsed"); } } else { Logger.Trace("Passage body parsed"); passage.Body.Elements.Add(HtmlNodeUtilities.BodyElementFromNode(ExtractionSettings.Input, node)); } } return(passage); }
//this text replacer will replace a list of values by searching all over the doc and replace first instance public void ReplacePlaceholders(List <PlaceholderReplacer> placeholderReplacerList, string templateDocxFullNameAndPath, string SaveAsDocxFullNameAndPath) { string worddocFullNameAndPath = string.Empty; try { //make a copy if the source and save paths are different if (templateDocxFullNameAndPath.ToLower() != SaveAsDocxFullNameAndPath.ToLower()) { File.Copy(templateDocxFullNameAndPath, SaveAsDocxFullNameAndPath, true); if (File.Exists(SaveAsDocxFullNameAndPath) == false) { //raise exception if save as not created throw new Exception("Exception creating a copy of template in WordTemplateTextReplace cause unknown"); } else { worddocFullNameAndPath = SaveAsDocxFullNameAndPath; } } else { //in this case template itself will change worddocFullNameAndPath = templateDocxFullNameAndPath; } using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(worddocFullNameAndPath, true)) { foreach (PlaceholderReplacer placeholderReplacer in placeholderReplacerList) { if (placeholderReplacer.IsHtml) { //locate placeholder or key in doc foreach (var paragraph in wordDoc.MainDocumentPart.RootElement.Descendants <Paragraph>()) { if (paragraph.InnerText.Contains(placeholderReplacer.Placeholder) != true) { continue; } HtmlNodeUtilities htmlNodeUtilities = new HtmlNodeUtilities(); string innerHtml = htmlNodeUtilities.GetInnerHtml(placeholderReplacer.Replacer); string replacer = string.Empty; //incase placeholder matches check if prefix is to be added if (placeholderReplacer.InlinePrefixHtmlText.Trim() != string.Empty) { replacer = placeholderReplacer.InlinePrefixHtmlText + placeholderReplacer.Replacer; } else { replacer = innerHtml; } //extract inner html from html replacer //var htmlNode = HtmlNode.CreateNode(placeholderReplacer.Replacer); //appy para tag innerHtml = "<p>" + replacer + "</p>"; //init Html to openxml converter HtmlToOpenXml htmlToOpenXml = new HtmlToOpenXml(); //replace para inner open xml with the one returned from Html to openXml convert function paragraph.InnerXml = htmlToOpenXml.ConvertHtmlToOpenXml(innerHtml); } } //else //{ // OPTools.TextReplacer.SearchAndReplace(wordDoc, placeholderReplacer.Placeholder, placeholderReplacer.Replacer, false); //} } wordDoc.Save(); wordDoc.Close(); } using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(worddocFullNameAndPath, true)) { foreach (PlaceholderReplacer placeholderReplacer in placeholderReplacerList) { //if (placeholderReplacer.IsHtml) //{ // //locate placeholder or key in doc // foreach (var paragraph in wordDoc.MainDocumentPart.RootElement.Descendants<Paragraph>()) // { // if (paragraph.InnerText.Contains(placeholderReplacer.Placeholder) != true) continue; // HtmlNodeUtilities htmlNodeUtilities = new HtmlNodeUtilities(); // string innerHtml = htmlNodeUtilities.GetInnerHtml(placeholderReplacer.Replacer); // string replacer = string.Empty; // //incase placeholder matches check if prefix is to be added // if (placeholderReplacer.InlinePrefixHtmlText.Trim() != string.Empty) // { // replacer = placeholderReplacer.InlinePrefixHtmlText + placeholderReplacer.Replacer; // } // else // { // replacer = innerHtml; // } // //extract inner html from html replacer // //var htmlNode = HtmlNode.CreateNode(placeholderReplacer.Replacer); // //appy para tag // innerHtml = "<p>" + replacer + "</p>"; // //init Html to openxml converter // HtmlToOpenXml htmlToOpenXml = new HtmlToOpenXml(); // //replace para inner open xml with the one returned from Html to openXml convert function // paragraph.InnerXml = htmlToOpenXml.ConvertHtmlToOpenXml(innerHtml); // } //} //else //{ OPTools.TextReplacer.SearchAndReplace(wordDoc, placeholderReplacer.Placeholder, placeholderReplacer.Replacer, false); //} } wordDoc.Save(); wordDoc.Close(); } using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(worddocFullNameAndPath, false)) { //if (wordDoc.MainDocumentPart.Document.InnerText.Contains("#PH#") == true) //{ // throw new Exception("template to doc creation not successfull, some placeholders are still not updated"); //} wordDoc.Close(); } } catch (Exception ex) { } }