/// <summary> /// Try to gather a title, a content, a bullet, and a side image with a footer. For template to be considered usable, all these /// items must be present. /// </summary> private void GetTemplateTitleBulletPhotoFooterContent() { bool isTitleSet, isContentSet, isBulletSet, isLeftPhotoFooterSet; isTitleSet = isContentSet = isBulletSet = isLeftPhotoFooterSet = false; var titles = page.DocumentNode.SelectNodes("//div[@class='Titulo']"); if (titles != null) { foreach (var t in titles) { Title tit = new Title(); tit.Name = MetadataName.OldDocTitleName; tit.Content = t.InnerHtml; examinedDoc.Title.Add(tit); } isTitleSet = true; } HtmlNode documentTextNode = null; var selectBullets = page.DocumentNode.SelectNodes("//p[@class='Balas']/img"); if (selectBullets != null) { foreach (var bullet in selectBullets) { var node = bullet.NextSibling; if (node.InnerHtml != "") { Bullet bl = new Bullet(); bl.Name = MetadataName.OldDocBulletName; bl.Content = node.InnerHtml.Replace("\r\n", "").Trim(); examinedDoc.Bullet.Add(bl); } } isBulletSet = true; if (selectBullets.Count != 0) { documentTextNode = selectBullets[selectBullets.Count - 1]; } } if (documentTextNode != null) { documentTextNode = documentTextNode.ParentNode.NextSibling; string txt = ""; while (documentTextNode != null) { txt += documentTextNode.OuterHtml; documentTextNode = documentTextNode.NextSibling; while (documentTextNode != null) { if (documentTextNode.Name.ToUpper() != "P") { documentTextNode = documentTextNode.NextSibling; } if (documentTextNode == null) { break; } if (documentTextNode.Name.ToUpper() == "P") { break; } } } if (txt != null && txt != "") { examinedDoc.DocumentText.Name = MetadataName.OldDocTextName; examinedDoc.DocumentText.Content = txt; isContentSet = true; } } var images = page.DocumentNode.SelectNodes("//td/img"); var footers = page.DocumentNode.SelectNodes("//td[@class='PieFoto']"); List<String> imageNameList = new List<String>(); if (images != null && footers != null) { imageNameList = new List<string>(); foreach (var img in images) { var imgSrcRef = img.Attributes["src"]; if (imgSrcRef != null) { var s = imgSrcRef.Value; if (s.Contains("BancoMedios/Imagenes")) { imageNameList.Add(s); } } } if (imageNameList.Count != 0) { for (int i = 0; i < imageNameList.Count; i++) { PhotoRelated pr = new PhotoRelated(); pr.ImageName = MetadataName.OldDocImageName; pr.ImageSrc = imageNameList[i]; pr.FooterName = MetadataName.OldDocImageFooterName; pr.Footer = footers[i].InnerHtml; examinedDoc.PhotoRelated.Add(pr); } isLeftPhotoFooterSet = true; } } if (isTitleSet && isContentSet && isBulletSet && isLeftPhotoFooterSet) { useTemplateTypeInstead = TemplateTypeCode.OLD_DOC_TEMPLATE_TYPE_008; } else { useTemplateTypeInstead = TemplateTypeCode.OLD_DOC_TEMPLATE_TYPE_UNASSIGNED; examinedDoc.Reset(); } }
/// <summary> /// Get Related photos (these include an image with is footer) set aside. /// </summary> private void RetrieveRelatedPhotos() { var photoContentList = page.DocumentNode.SelectNodes("//td[@class='Cuerpo_Foto']"); if (photoContentList != null) { useTemplateTypeInstead = TemplateTypeCode.OLD_DOC_TEMPLATE_TYPE_004; int numItems = photoContentList.Count; var photoList = page.DocumentNode.SelectNodes("//table[@id='Table1']/tr[1]/td/img"); var photoCreditList = page.DocumentNode.SelectNodes("//table[@id='Table1' and @bgcolor='#f4f7fc']/tr[2]/td"); if (photoList != null && photoCreditList != null) { for (int i = 0; i < numItems; i++) { PhotoRelated pr = new PhotoRelated(); var imgSrcRef = photoList[i].Attributes["src"]; if (imgSrcRef != null) { var s = imgSrcRef.Value; if (s.Contains("BancoMedios/Imagenes")) { pr.ImageName = MetadataName.OldDocImageName; pr.ImageSrc = s; } else { pr.ImageName = MetadataName.OldDocImageName; pr.ImageSrc = ""; } } pr.FooterName = MetadataName.OldDocImageFooterName; pr.Footer = photoCreditList[i].InnerHtml + "<br>" + photoContentList[i].InnerHtml; examinedDoc.PhotoRelated.Add(pr); } } } else { List<string> imageNameList = null; List<string> imgFooterNameList = null; var images = page.DocumentNode.SelectNodes("//img"); if (images != null) { imageNameList = new List<string>(); foreach (var img in images) { var imgSrcRef = img.Attributes["src"]; if (imgSrcRef != null) { var s = imgSrcRef.Value; if (s.Contains("BancoMedios/Imagenes")) { imageNameList.Add(s); } } } } var imgFooter = page.DocumentNode.SelectNodes("//td[@class='PieFoto']"); if (imgFooter != null) { imgFooterNameList = new List<string>(); foreach (var imf in imgFooter) { var footerTxt = imf.InnerHtml; if (footerTxt != "") { imgFooterNameList.Add(imf.InnerText); } } } // It is required that both imageNameList and imgFooterNameList // both contain the same number of elements if (imageNameList != null && imgFooterNameList != null) { if (imgFooterNameList.Count == imageNameList.Count) { useTemplateTypeInstead = TemplateTypeCode.OLD_DOC_TEMPLATE_TYPE_001; for (int i = 0; i < imageNameList.Count; i++) { PhotoRelated pr = new PhotoRelated(); var imageNameContent = imageNameList[i]; var imageFooterNameContent = imgFooterNameList[i]; pr.ImageName = MetadataName.OldDocImageName; pr.ImageSrc = imageNameContent; pr.FooterName = MetadataName.OldDocImageFooterName; pr.Footer = imageFooterNameContent; examinedDoc.PhotoRelated.Add(pr); } } } } }