Example #1
0
        public static List <SmeDocItem> GetDevidedLegalPartFromBodyHtml(HtmlDocument htmlDoc)
        {
            var mainAnchors = htmlDoc.DocumentNode.FirstChild.SelectNodes("./a[@class='doc-anchor']");

            List <SmeDocItem> resultItems = new List <SmeDocItem>();

            foreach (HtmlNode currAnchor in mainAnchors)
            {
                string currBaseEId = currAnchor.Attributes[@"eId"]?.Value;
                if (!string.IsNullOrWhiteSpace(currBaseEId))
                {
                    SmeDocItem currentBaseEl = new SmeDocItem();
                    currentBaseEl.Text     += currAnchor.OuterHtml;
                    currentBaseEl.TreeLevel = 0;
                    SetElementIdAndType(currBaseEId, currentBaseEl);

                    var currBaseNode = currAnchor.NextSibling;
                    GetAndSetChildsNodes(currentBaseEl, currBaseNode);

                    resultItems.Add(currentBaseEl);
                }
            }

            return(resultItems);
        }
        private static void FixRecitals(SmeDoc smeDoc)
        {
            var firstRecitalIndex = smeDoc.Items.IndexOf(smeDoc.Items.FirstOrDefault(x => x.Type == SmeDocItemType.Recital));

            if (firstRecitalIndex < 0)
            {
                return;
            }
            var recitalItems = smeDoc.Items.Where(x => x.Type == SmeDocItemType.Recital).ToList();
            var cultureInfo  = Properties.Resources.Culture;

            smeDoc.Items = smeDoc.Items.Where(x => x.Type != SmeDocItemType.Recital).ToList();
            var partSize     = 30;
            int partsCount   = (int)Math.Ceiling((double)recitalItems.Count() / partSize);
            var groupedItems = new List <SmeDocItem>();

            for (int i = 0; i < partsCount; i++)
            {
                var smeDocItem = new SmeDocItem()
                {
                    Childs = new List <SmeDocItem>(recitalItems.Skip(i * partSize).Take(partSize)),
                    Type   = SmeDocItemType.Recital
                };
                smeDocItem.Heading = $"{Translator.GetString("Recitals")} {i * partSize + 1}-{i * partSize + smeDocItem.Childs.Count}";
                groupedItems.Add(smeDocItem);
            }
            smeDoc.Items.InsertRange(firstRecitalIndex, groupedItems);
        }
        private static void GetHtml(StringBuilder sb, SmeDocItem item, SmeDoc document, SmeLanguage lang, bool addArtRec = true)
        {
            sb.AppendLine(item.Text);

            if (addArtRec)
            {
                sb.AppendLine(AddButtons(item.Recitals, $"{item.Id}_rec", Translator.GetString("Recitals"), document, lang));
                sb.AppendLine(AddButtons(item.Articles, $"{item.Id}_art", Translator.GetString("Articles"), document, lang));

                if (item.OldArticles?.Count > 0)
                {
                    var od = OldDirective(lang);

                    if (od != null)
                    {
                        sb.AppendLine(AddButtons(item.OldArticles, $"{item.Id}_old_art", Translator.GetString("31995L0046"), od, lang, true));
                    }
                }
            }

            foreach (var childItem in item.Childs)
            {
                GetHtml(sb, childItem, document, lang, addArtRec);
            }
        }
        public static string GetDisplayText(SmeDoc document, SmeLanguage lang, SmeDocItem docItem, bool addArtRec = true)
        {
            var sb = new StringBuilder();

            GetHtml(sb, docItem, document, lang, addArtRec);
            return(sb.ToString());
        }
        private static void TravelDocument(SmeDocItem orgItem, List <SmeDocItem> resItems)
        {
            var tmpItem = orgItem.Clone();

            resItems.Add(tmpItem);
            if (!endLevelItemTypes.Contains(orgItem.Type))
            {
                tmpItem.Childs = new List <SmeDocItem>();
                foreach (var child in orgItem.Childs)
                {
                    TravelDocument(child, resItems);
                }
            }
        }
Example #6
0
        public static List <SmeDocItem> GetRecitalsFromPrefaceHtml(HtmlDocument htmlDoc)
        {
            List <SmeDocItem> resultItems = new List <SmeDocItem>();

            var allContentDivs = htmlDoc.DocumentNode.SelectNodes(@"/div/div/div");

            SmeDocItem notRecItem = new SmeDocItem();

            notRecItem.Type = SmeDocItemType.Text;

            foreach (HtmlNode divNode in allContentDivs)
            {
                Match matchRec = Regex.Match(divNode.InnerText, @"^\s*\((\d+)\)\s+");

                if (matchRec.Success)
                {
                    if (notRecItem != null)
                    {
                        resultItems.Add(notRecItem);
                        notRecItem = null;
                    }

                    SmeDocItem recItem = new SmeDocItem();
                    recItem.Type = SmeDocItemType.Recital;
                    recItem.Id   = $"rec_{matchRec.Groups[1].Value}";
                    recItem.Text = divNode.OuterHtml;

                    resultItems.Add(recItem);
                }
                else
                {
                    if (notRecItem == null)
                    {
                        notRecItem      = new SmeDocItem();
                        notRecItem.Type = SmeDocItemType.Text;
                    }

                    notRecItem.Text += divNode.OuterHtml;
                }
            }

            if (notRecItem != null)
            {
                resultItems.Add(notRecItem);
            }

            return(resultItems);
        }
Example #7
0
        private static void SetElementIdAndType(string currBaseEId, SmeDocItem currentBaseEl)
        {
            Match matchType = Regex.Match(currBaseEId, @"([^_]+)_([^_]+)$");

            if (matchType.Success)
            {
                if (currBaseEId.StartsWith("art_"))
                {
                    currentBaseEl.Id = currBaseEId;
                }
                else
                {
                    currentBaseEl.Id = matchType.Value;
                }

                switch (matchType.Groups[1].Value)
                {
                case "tit":
                    currentBaseEl.Type = SmeDocItemType.Title;
                    break;

                case "rec":
                    currentBaseEl.Type = SmeDocItemType.Recital;
                    break;

                case "sect":
                    currentBaseEl.Type = SmeDocItemType.Section;
                    break;

                case "chap":
                    currentBaseEl.Type = SmeDocItemType.Chapter;
                    break;

                case "part":
                    currentBaseEl.Type = SmeDocItemType.Part;
                    break;

                case "art":
                    currentBaseEl.Type = SmeDocItemType.Article;
                    break;

                case "par":
                    currentBaseEl.Type = SmeDocItemType.Paragraph;
                    break;

                case "pt":
                    currentBaseEl.Type = SmeDocItemType.Point;
                    break;

                case "sent":
                    currentBaseEl.Type = SmeDocItemType.Sentence;
                    break;

                case "let":
                    currentBaseEl.Type = SmeDocItemType.Letter;
                    break;

                case "num":
                    currentBaseEl.Type = SmeDocItemType.Number;
                    break;

                default:
                    currentBaseEl.Type = SmeDocItemType.Text;
                    break;
                }
            }
        }
Example #8
0
        private static void GetAndSetChildsNodes(SmeDocItem currentBaseEl, HtmlNode currBaseNode)
        {
            {
                if (currBaseNode != null)
                {
                    currentBaseEl.Text += Regex.Match(currBaseNode.OuterHtml, @"^\s*\<[^\<\>]+\>").Value + Regex.Match(currBaseNode.OuterHtml, @"\<\/[^\<\>]+\>\s*$").Value;

                    bool isChildNodeNext = false;

                    foreach (HtmlNode childNode in currBaseNode.ChildNodes)
                    {
                        if (isChildNodeNext)
                        {
                            isChildNodeNext = false;
                            continue;
                        }

                        if (childNode.Attributes[@"class"] != null)
                        {
                            if (childNode.Attributes[@"class"].Value.Contains("d-num"))
                            {
                                currentBaseEl.Heading = childNode.InnerText.Trim();
                            }
                            else if (childNode.Attributes[@"class"].Value.Contains("d-heading"))
                            {
                                currentBaseEl.SubHeading = childNode.InnerText.Trim();
                            }
                        }

                        if (childNode.Name.ToLower() != "a")
                        {
                            currentBaseEl.Text += childNode.OuterHtml;
                        }
                        else
                        {
                            string childEId = childNode.Attributes[@"eId"]?.Value;
                            if (!string.IsNullOrWhiteSpace(childEId))
                            {
                                SmeDocItem childBaseEl = new SmeDocItem();
                                childBaseEl.TreeLevel = currentBaseEl.TreeLevel + 1;
                                childBaseEl.Text     += childNode.OuterHtml;
                                SetElementIdAndType(childEId, childBaseEl);
                                var childBaseNode = childNode.NextSibling;
                                isChildNodeNext = true;

                                if (Regex.IsMatch(childBaseEl.Id, @"^art_", RegexOptions.IgnoreCase))
                                {
                                    childBaseEl.Text += Regex.Match(childBaseNode.OuterHtml, @"^\s*\<[^\<\>]+\>").Value + Regex.Match(childBaseNode.OuterHtml, @"\<\/[^\<\>]+\>\s*$").Value;

                                    foreach (var artChild in childBaseNode.ChildNodes)
                                    {
                                        if (artChild.Attributes[@"class"] != null)
                                        {
                                            if (artChild.Attributes[@"class"].Value.Contains("d-num"))
                                            {
                                                childBaseEl.Heading = artChild.InnerText.Trim();
                                            }
                                            else
                                            {
                                                Match matchSubHeading = Regex.Match(artChild.InnerHtml, @"\<[^\<\>]+class\s?=\s?['""][^'""]*?d-c-sti-art[^'""]*?['""][^\<\>]*\>([^\<\>]+)\<");

                                                if (matchSubHeading.Success)
                                                {
                                                    childBaseEl.SubHeading = matchSubHeading.Groups[1].Value;
                                                }
                                            }
                                        }

                                        if (artChild.Name.ToLower() != "div")
                                        {
                                            childBaseEl.Text += artChild.OuterHtml;
                                        }
                                        else
                                        {
                                            childBaseEl.Text += Regex.Match(artChild.OuterHtml, @"^\s*\<[^\<\>]+\>").Value + Regex.Match(artChild.OuterHtml, @"\<\/[^\<\>]+\>\s*$").Value;

                                            foreach (var divChild in artChild.ChildNodes)
                                            {
                                                if (divChild.Name.ToLower() == "p")
                                                {
                                                    Match matchNumber = Regex.Match(divChild.InnerText.Trim(), @"^(\d+)\.");

                                                    if (matchNumber.Success)
                                                    {
                                                        SmeDocItem parEl = new SmeDocItem();
                                                        parEl.TreeLevel = childBaseEl.TreeLevel + 1;
                                                        parEl.Text      = divChild.OuterHtml;
                                                        var parEId = $"{childBaseEl.Id}__par_{matchNumber.Groups[1].Value}";
                                                        SetElementIdAndType(parEId, parEl);

                                                        childBaseEl.Childs.Add(parEl);
                                                    }
                                                    else
                                                    {
                                                        childBaseEl.Text += divChild.OuterHtml;
                                                    }
                                                }
                                                else if (divChild.Name.ToLower() == "table")
                                                {
                                                    SmeDocItem currEl = new SmeDocItem();
                                                    currEl.TreeLevel = childBaseEl.TreeLevel + 1;
                                                    currEl.Text      = divChild.OuterHtml;
                                                    var currEId = string.Empty;

                                                    Match matchBegin = Regex.Match(divChild.InnerText.Trim(), @"^(\p{L})\)");

                                                    if (matchBegin.Success)
                                                    {
                                                        currEId = $"{(childBaseEl.Childs.Count > 0 ? childBaseEl.Childs.Last().Id : childBaseEl.Id)}__let_{matchBegin.Groups[1].Value}";
                                                    }
                                                    else
                                                    {
                                                        matchBegin = Regex.Match(divChild.InnerText.Trim(), @"^(\d+)\)");

                                                        if (matchBegin.Success)
                                                        {
                                                            currEId = $"{(childBaseEl.Childs.Count > 0 ? childBaseEl.Childs.Last().Id : childBaseEl.Id)}__pt_{matchBegin.Groups[1].Value}";
                                                        }
                                                        else
                                                        {
                                                            childBaseEl.Text += divChild.OuterHtml;
                                                        }
                                                    }

                                                    if (!string.IsNullOrWhiteSpace(currEId))
                                                    {
                                                        SetElementIdAndType(currEId, currEl);

                                                        if (childBaseEl.Childs.Count > 0)
                                                        {
                                                            currEl.TreeLevel = childBaseEl.Childs.Last().TreeLevel + 1;
                                                            childBaseEl.Childs.Last().Childs.Add(currEl);
                                                        }
                                                        else
                                                        {
                                                            childBaseEl.Childs.Add(currEl);
                                                        }
                                                    }
                                                }
                                                else
                                                {
                                                    childBaseEl.Text += divChild.OuterHtml;
                                                }
                                            }
                                        }
                                    }
                                }
                                else
                                {
                                    GetAndSetChildsNodes(childBaseEl, childBaseNode);
                                }

                                currentBaseEl.Childs.Add(childBaseEl);
                            }
                        }
                    }
                }
            }
        }