Пример #1
0
        public static ItemBody Parse(HtmlNode table)
        {
            var itemBody = new ItemBody();

            foreach (var p in table.SelectNodes(".//p"))
            {
                var answer = p.SelectNodes(".//span")?
                             .FirstOrDefault(
                    x => StringUtilities.MatchesCharacterInRange(x.InnerText, 'A', 'D'))?
                             .InnerText.Trim();

                if (!string.IsNullOrEmpty(answer))
                {
                    Logger.Trace($"Parsing answer for {answer}");
                    itemBody.AnswerChoices.Add(answer,
                                               HtmlNodeUtilities.BodyElementFromNode(ExtractionSettings.Input, p));
                    if (!itemBody.AnswerChoices[answer].IsResource())
                    {
                        var document = new HtmlDocument();
                        document.LoadHtml(itemBody.AnswerChoices[answer].Text);
                        var span = document.DocumentNode.SelectNodes("//span").Last();
                        itemBody.AnswerChoices[answer].Text = span.OuterHtml.Trim();
                    }
                }
                else
                {
                    Logger.Trace("Parsing item body");
                    itemBody.Elements.Add(HtmlNodeUtilities.BodyElementFromNode(ExtractionSettings.Input, p));
                }
            }
            return(itemBody);
        }
Пример #2
0
        public static Passage Parse(IList <HtmlNode> nodes)
        {
            var passage = new Passage();

            foreach (var node in nodes)
            {
                var nodeInfo =
                    node.ChildNodes.FirstOrDefault(
                        x => x.Name.Equals("span", StringComparison.OrdinalIgnoreCase))?.InnerText;
                if (passage.Metadata.Values.Any(string.IsNullOrEmpty))
                {
                    if (string.IsNullOrEmpty(nodeInfo))
                    {
                        Logger.LogError(new ErrorReportItem
                        {
                            Location = $"Passage Parsing: {node.OuterHtml}",
                            Severity = LogLevel.Error
                        }, "Unable to retrieve nodeInfo while parsing passage");
                        continue;
                    }
                    if (nodeInfo.Contains("Passage") && nodeInfo.Contains("Code"))
                    {
                        passage.Metadata.AddMetadata("PassageCode",
                                                     nodeInfo.RestrictToSingleWhiteSpace().Split(':').LastOrDefault());
                        Logger.Trace("Passage code parsed");
                    }
                    else if (nodeInfo.Contains("Passage") && nodeInfo.Contains("Title"))
                    {
                        passage.Metadata.AddMetadata("PassageTitle",
                                                     nodeInfo.RestrictToSingleWhiteSpace().Split(':').LastOrDefault());
                        Logger.Trace("Passage title parsed");
                    }
                }
                else
                {
                    Logger.Trace("Passage body parsed");
                    passage.Body.Elements.Add(HtmlNodeUtilities.BodyElementFromNode(ExtractionSettings.Input, node));
                }
            }
            return(passage);
        }
Пример #3
0
        //this text replacer will replace a list of values by searching all over the doc and replace first instance
        public void ReplacePlaceholders(List <PlaceholderReplacer> placeholderReplacerList, string templateDocxFullNameAndPath, string SaveAsDocxFullNameAndPath)
        {
            string worddocFullNameAndPath = string.Empty;

            try
            {
                //make a copy if the source and save paths are different

                if (templateDocxFullNameAndPath.ToLower() != SaveAsDocxFullNameAndPath.ToLower())
                {
                    File.Copy(templateDocxFullNameAndPath, SaveAsDocxFullNameAndPath, true);
                    if (File.Exists(SaveAsDocxFullNameAndPath) == false)
                    {
                        //raise exception if save as not created
                        throw new Exception("Exception creating a copy of template in WordTemplateTextReplace cause unknown");
                    }
                    else
                    {
                        worddocFullNameAndPath = SaveAsDocxFullNameAndPath;
                    }
                }
                else
                {
                    //in this case template itself will change
                    worddocFullNameAndPath = templateDocxFullNameAndPath;
                }

                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(worddocFullNameAndPath, true))
                {
                    foreach (PlaceholderReplacer placeholderReplacer in placeholderReplacerList)
                    {
                        if (placeholderReplacer.IsHtml)
                        {
                            //locate placeholder or key in doc
                            foreach (var paragraph in wordDoc.MainDocumentPart.RootElement.Descendants <Paragraph>())
                            {
                                if (paragraph.InnerText.Contains(placeholderReplacer.Placeholder) != true)
                                {
                                    continue;
                                }

                                HtmlNodeUtilities htmlNodeUtilities = new HtmlNodeUtilities();
                                string            innerHtml         = htmlNodeUtilities.GetInnerHtml(placeholderReplacer.Replacer);

                                string replacer = string.Empty;
                                //incase placeholder matches check if prefix is to be added
                                if (placeholderReplacer.InlinePrefixHtmlText.Trim() != string.Empty)
                                {
                                    replacer = placeholderReplacer.InlinePrefixHtmlText + placeholderReplacer.Replacer;
                                }
                                else
                                {
                                    replacer = innerHtml;
                                }
                                //extract inner html from html replacer

                                //var htmlNode = HtmlNode.CreateNode(placeholderReplacer.Replacer);

                                //appy para tag
                                innerHtml = "<p>" + replacer + "</p>";

                                //init Html to openxml converter
                                HtmlToOpenXml htmlToOpenXml = new HtmlToOpenXml();
                                //replace para inner open xml with the one returned from Html to openXml convert function
                                paragraph.InnerXml = htmlToOpenXml.ConvertHtmlToOpenXml(innerHtml);
                            }
                        }
                        //else
                        //{
                        //    OPTools.TextReplacer.SearchAndReplace(wordDoc, placeholderReplacer.Placeholder, placeholderReplacer.Replacer, false);
                        //}
                    }
                    wordDoc.Save();
                    wordDoc.Close();
                }

                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(worddocFullNameAndPath, true))
                {
                    foreach (PlaceholderReplacer placeholderReplacer in placeholderReplacerList)
                    {
                        //if (placeholderReplacer.IsHtml)
                        //{
                        //    //locate placeholder or key in doc
                        //    foreach (var paragraph in wordDoc.MainDocumentPart.RootElement.Descendants<Paragraph>())
                        //    {
                        //        if (paragraph.InnerText.Contains(placeholderReplacer.Placeholder) != true) continue;

                        //        HtmlNodeUtilities htmlNodeUtilities = new HtmlNodeUtilities();
                        //        string innerHtml = htmlNodeUtilities.GetInnerHtml(placeholderReplacer.Replacer);

                        //        string replacer = string.Empty;
                        //        //incase placeholder matches check if prefix is to be added
                        //        if (placeholderReplacer.InlinePrefixHtmlText.Trim() != string.Empty)
                        //        {
                        //            replacer = placeholderReplacer.InlinePrefixHtmlText + placeholderReplacer.Replacer;
                        //        }
                        //        else
                        //        {
                        //            replacer = innerHtml;
                        //        }
                        //        //extract inner html from html replacer

                        //        //var htmlNode = HtmlNode.CreateNode(placeholderReplacer.Replacer);

                        //        //appy para tag
                        //        innerHtml = "<p>" + replacer + "</p>";

                        //        //init Html to openxml converter
                        //        HtmlToOpenXml htmlToOpenXml = new HtmlToOpenXml();
                        //        //replace para inner open xml with the one returned from Html to openXml convert function
                        //        paragraph.InnerXml = htmlToOpenXml.ConvertHtmlToOpenXml(innerHtml);
                        //    }
                        //}
                        //else
                        //{
                        OPTools.TextReplacer.SearchAndReplace(wordDoc, placeholderReplacer.Placeholder, placeholderReplacer.Replacer, false);
                        //}
                    }
                    wordDoc.Save();
                    wordDoc.Close();
                }

                using (WordprocessingDocument wordDoc = WordprocessingDocument.Open(worddocFullNameAndPath, false))
                {
                    //if (wordDoc.MainDocumentPart.Document.InnerText.Contains("#PH#") == true)
                    //{
                    //    throw new Exception("template to doc creation not successfull, some placeholders are still not updated");
                    //}
                    wordDoc.Close();
                }
            }
            catch (Exception ex)
            {
            }
        }