Пример #1
0
        /// <summary>
        /// Try to gather a title, a content, a bullet, and a side image with a footer. For template to be considered usable, all these
        /// items must be present.
        /// </summary>
        private void GetTemplateTitleBulletPhotoFooterContent() {
            bool isTitleSet, isContentSet, isBulletSet, isLeftPhotoFooterSet;
            isTitleSet = isContentSet = isBulletSet = isLeftPhotoFooterSet = false;

            var titles = page.DocumentNode.SelectNodes("//div[@class='Titulo']");
            if (titles != null) {
                foreach (var t in titles) {
                    Title tit = new Title();
                    tit.Name = MetadataName.OldDocTitleName;
                    tit.Content = t.InnerHtml;
                    examinedDoc.Title.Add(tit);
                }
                isTitleSet = true;
            }
            HtmlNode documentTextNode = null;
            var selectBullets = page.DocumentNode.SelectNodes("//p[@class='Balas']/img");
            if (selectBullets != null) {
                foreach (var bullet in selectBullets) {
                    var node = bullet.NextSibling;
                    if (node.InnerHtml != "") {
                        Bullet bl = new Bullet();
                        bl.Name = MetadataName.OldDocBulletName;
                        bl.Content = node.InnerHtml.Replace("\r\n", "").Trim();
                        examinedDoc.Bullet.Add(bl);
                    }
                }
                isBulletSet = true;
                if (selectBullets.Count != 0) {
                    documentTextNode = selectBullets[selectBullets.Count - 1];
                }
            }
            if (documentTextNode != null) {
                documentTextNode = documentTextNode.ParentNode.NextSibling;
                string txt = "";
                while (documentTextNode != null) {
                    txt += documentTextNode.OuterHtml;
                    documentTextNode = documentTextNode.NextSibling;
                    while (documentTextNode != null) {
                        if (documentTextNode.Name.ToUpper() != "P") {
                            documentTextNode = documentTextNode.NextSibling;
                        }
                        if (documentTextNode == null) {
                            break;
                        }
                        if (documentTextNode.Name.ToUpper() == "P") {
                            break;
                        }
                    }
                }
                if (txt != null && txt != "") {
                    examinedDoc.DocumentText.Name = MetadataName.OldDocTextName;
                    examinedDoc.DocumentText.Content = txt;
                    isContentSet = true;
                }
            }

            var images = page.DocumentNode.SelectNodes("//td/img");
            var footers = page.DocumentNode.SelectNodes("//td[@class='PieFoto']");
            List<String> imageNameList = new List<String>();
            if (images != null && footers != null) {
                imageNameList = new List<string>();
                foreach (var img in images) {
                    var imgSrcRef = img.Attributes["src"];
                    if (imgSrcRef != null) {
                        var s = imgSrcRef.Value;
                        if (s.Contains("BancoMedios/Imagenes")) {
                            imageNameList.Add(s);
                        }
                    }
                }
                if (imageNameList.Count != 0) {
                    for (int i = 0; i < imageNameList.Count; i++) {
                        PhotoRelated pr = new PhotoRelated();
                        pr.ImageName = MetadataName.OldDocImageName;
                        pr.ImageSrc = imageNameList[i];
                        pr.FooterName = MetadataName.OldDocImageFooterName;
                        pr.Footer = footers[i].InnerHtml;
                        examinedDoc.PhotoRelated.Add(pr);
                    }
                    isLeftPhotoFooterSet = true;
                }
            }
            if (isTitleSet && isContentSet && isBulletSet && isLeftPhotoFooterSet) {
                useTemplateTypeInstead = TemplateTypeCode.OLD_DOC_TEMPLATE_TYPE_008;
            }
            else {
                useTemplateTypeInstead = TemplateTypeCode.OLD_DOC_TEMPLATE_TYPE_UNASSIGNED;
                examinedDoc.Reset();
            }
        }
Пример #2
0
        /// <summary>
        /// Get Related photos (these include an image with is footer) set aside.
        /// </summary>
        private void RetrieveRelatedPhotos() {
            var photoContentList = page.DocumentNode.SelectNodes("//td[@class='Cuerpo_Foto']");

            if (photoContentList != null) {
                useTemplateTypeInstead = TemplateTypeCode.OLD_DOC_TEMPLATE_TYPE_004;
                int numItems = photoContentList.Count;
                var photoList = page.DocumentNode.SelectNodes("//table[@id='Table1']/tr[1]/td/img");
                var photoCreditList = page.DocumentNode.SelectNodes("//table[@id='Table1' and @bgcolor='#f4f7fc']/tr[2]/td");

                if (photoList != null && photoCreditList != null) {
                    for (int i = 0; i < numItems; i++) {
                        PhotoRelated pr = new PhotoRelated();
                        var imgSrcRef = photoList[i].Attributes["src"];
                        if (imgSrcRef != null) {
                            var s = imgSrcRef.Value;
                            if (s.Contains("BancoMedios/Imagenes")) {
                                pr.ImageName = MetadataName.OldDocImageName;
                                pr.ImageSrc = s;
                            }
                            else {
                                pr.ImageName = MetadataName.OldDocImageName;
                                pr.ImageSrc = "";
                            }
                        }
                        pr.FooterName = MetadataName.OldDocImageFooterName;
                        pr.Footer = photoCreditList[i].InnerHtml + "<br>" + photoContentList[i].InnerHtml;
                        examinedDoc.PhotoRelated.Add(pr);
                    }
                }
            }
            else {
                List<string> imageNameList = null;
                List<string> imgFooterNameList = null;
                var images = page.DocumentNode.SelectNodes("//img");

                if (images != null) {
                    imageNameList = new List<string>();
                    foreach (var img in images) {
                        var imgSrcRef = img.Attributes["src"];
                        if (imgSrcRef != null) {
                            var s = imgSrcRef.Value;
                            if (s.Contains("BancoMedios/Imagenes")) {
                                imageNameList.Add(s);
                            }
                        }
                    }
                }

                var imgFooter = page.DocumentNode.SelectNodes("//td[@class='PieFoto']");
                if (imgFooter != null) {
                    imgFooterNameList = new List<string>();
                    foreach (var imf in imgFooter) {
                        var footerTxt = imf.InnerHtml;
                        if (footerTxt != "") {
                            imgFooterNameList.Add(imf.InnerText);
                        }                        
                    }
                }

                // It is required that both imageNameList and imgFooterNameList
                // both contain the same number of elements
                if (imageNameList != null && imgFooterNameList != null) {
                    if (imgFooterNameList.Count == imageNameList.Count) {
                        useTemplateTypeInstead = TemplateTypeCode.OLD_DOC_TEMPLATE_TYPE_001;
                        for (int i = 0; i < imageNameList.Count; i++) {
                            PhotoRelated pr = new PhotoRelated();
                            var imageNameContent = imageNameList[i];
                            var imageFooterNameContent = imgFooterNameList[i];

                            pr.ImageName = MetadataName.OldDocImageName;
                            pr.ImageSrc = imageNameContent;
                            pr.FooterName = MetadataName.OldDocImageFooterName;
                            pr.Footer = imageFooterNameContent;
                            examinedDoc.PhotoRelated.Add(pr);
                        }
                    }
                }
            }
        }