public WebPageData[] ExtractData(HtmlDocument doc) { List <WebPageData> data = new List <WebPageData>(); var container = HtmlNode.CreateNode(content); var mainNode = Helper.AnyChild(container, "b"); if (mainNode != null) { var aLinkNodes = Helper.AllChild(container, "a"); if (aLinkNodes == null) { return(null); } foreach (var alinkNode in aLinkNodes) { string link = ""; if (alinkNode.HasAttributes) { link = alinkNode.GetAttributeValue("href", ""); } link = link.Contains(webFolder) ? "" : webFolder + link; WebPageData singleData = WebPageData.GetTextOnly($"{alinkNode.InnerText} - {mainNode.InnerText.Replace(':', '\0')}", ""); singleData.IsFinal = true; //singleData.underlayingLinkReader = new IdlebrainAlbumPageReader(link);//TODO: MEMORY LEAK -> don't create new instance for every item singleData.Tag = link; //add the link in tag data.Add(singleData); } return(data.ToArray()); } return(null); }
public WebPageData[] ExtractData(HtmlDocument doc) { if (doc == null && cached != null) { return(cached); //return data from cache } if (doc == null) { return(null); //error protection } Dictionary <string, string> att = new Dictionary <string, string>(); att.Add("align", "left"); try { var container = Helper.AllChild(doc.DocumentNode, "div", att, true)[1]; if (container == null) { return(null); } //container = Helper.AnyChild(container, "p"); string innHtml = container.InnerHtml; string[] stElements = innHtml.Split(new string[] { "<br>" }, StringSplitOptions.RemoveEmptyEntries); List <WebPageData> data = new List <WebPageData>(); foreach (var st in stElements) { var sss = $"<div>{Helper.TrimToEntry(st)}</div>"; HtmlNode pNode = HtmlNode.CreateNode(sss); var mainNode = Helper.AnyChild(pNode, "b"); if (mainNode != null) { var aLinkNodes = Helper.AllChild(pNode, "a"); var singleData = WebPageData.GetTextOnly( mainNode.InnerText.Replace(':', '\0'), aLinkNodes != null ? $"Contain : {aLinkNodes.Count.ToString()}" : ""); //singleData.underlayingLinkReader = new IdlebrainSimulatedPage(sss);//TODO: MEMORY LEAK -> don't create new instance for every item singleData.Tag = sss; //Add the data as tag singleData.IsFinal = true; data.Add(singleData); } } this.IsSimulation = true; return(cached = data.ToArray()); } catch (Exception) { } return(null); }
public WebPageData[] ExtractData(HtmlDocument doc) { AlbumImages.Clear(); Dictionary <string, string> att = new Dictionary <string, string>(); att.Add("width", "100%"); att.Add("style", "background-color: white;"); var container = Helper.AnyChild(doc.DocumentNode, "table", att, true); if (container == null) { return(null); } //TODO : add support for http://www.idlebrain.com/movie/photogallery/madhusharma1.html var imgNodes = Helper.AllChild(container, "img"); if (imgNodes == null) { return(null); } int index = 0; List <WebPageData> data = new List <WebPageData>(); foreach (var imgNode in imgNodes) { string thSrc = (webDir.EndsWith("/") ? webDir : webDir + "/") + imgNode.GetAttributeValue("src", ""); var singleData = WebPageData.GetTextOnly($"{index++}", ""); singleData.IsFinal = true; singleData.UID = UidGenerator(); singleData.ImageUrl = thSrc; //image link var imgDefi = new ImageDefinition() { thumbnil = thSrc, original = (webDir.EndsWith("/") ? webDir : webDir + "/") + imgNode.GetAttributeValue("src", "").Replace("th_", "") }; AlbumImages.Add(imgDefi); data.Add(singleData); } return(data.ToArray()); }