Example #1
0
            public WebPageData[] ExtractData(HtmlDocument doc)
            {
                List <WebPageData> data = new List <WebPageData>();
                var container           = HtmlNode.CreateNode(content);
                var mainNode            = Helper.AnyChild(container, "b");

                if (mainNode != null)
                {
                    var aLinkNodes = Helper.AllChild(container, "a");
                    if (aLinkNodes == null)
                    {
                        return(null);
                    }
                    foreach (var alinkNode in aLinkNodes)
                    {
                        string link = "";
                        if (alinkNode.HasAttributes)
                        {
                            link = alinkNode.GetAttributeValue("href", "");
                        }
                        link = link.Contains(webFolder) ? "" : webFolder + link;

                        WebPageData singleData = WebPageData.GetTextOnly($"{alinkNode.InnerText} - {mainNode.InnerText.Replace(':', '\0')}", "");
                        singleData.IsFinal = true;
                        //singleData.underlayingLinkReader = new IdlebrainAlbumPageReader(link);//TODO: MEMORY LEAK -> don't create new instance for every item
                        singleData.Tag = link;  //add the link in tag
                        data.Add(singleData);
                    }
                    return(data.ToArray());
                }
                return(null);
            }
Example #2
0
            public WebPageData[] ExtractData(HtmlDocument doc)
            {
                if (doc == null && cached != null)
                {
                    return(cached);                                 //return data from cache
                }
                if (doc == null)
                {
                    return(null);               //error protection
                }
                Dictionary <string, string> att = new Dictionary <string, string>();

                att.Add("align", "left");
                try
                {
                    var container = Helper.AllChild(doc.DocumentNode, "div", att, true)[1];
                    if (container == null)
                    {
                        return(null);
                    }

                    //container = Helper.AnyChild(container, "p");

                    string innHtml = container.InnerHtml;

                    string[] stElements = innHtml.Split(new string[] { "<br>" }, StringSplitOptions.RemoveEmptyEntries);

                    List <WebPageData> data = new List <WebPageData>();

                    foreach (var st in stElements)
                    {
                        var      sss      = $"<div>{Helper.TrimToEntry(st)}</div>";
                        HtmlNode pNode    = HtmlNode.CreateNode(sss);
                        var      mainNode = Helper.AnyChild(pNode, "b");
                        if (mainNode != null)
                        {
                            var aLinkNodes = Helper.AllChild(pNode, "a");
                            var singleData = WebPageData.GetTextOnly(
                                mainNode.InnerText.Replace(':', '\0'),
                                aLinkNodes != null ? $"Contain : {aLinkNodes.Count.ToString()}" : "");

                            //singleData.underlayingLinkReader = new IdlebrainSimulatedPage(sss);//TODO: MEMORY LEAK -> don't create new instance for every item
                            singleData.Tag = sss;   //Add the data as tag

                            singleData.IsFinal = true;

                            data.Add(singleData);
                        }
                    }
                    this.IsSimulation = true;
                    return(cached = data.ToArray());
                }
                catch (Exception) { }
                return(null);
            }
Example #3
0
            public WebPageData[] ExtractData(HtmlDocument doc)
            {
                AlbumImages.Clear();
                Dictionary <string, string> att = new Dictionary <string, string>();

                att.Add("width", "100%");
                att.Add("style", "background-color: white;");

                var container = Helper.AnyChild(doc.DocumentNode, "table", att, true);

                if (container == null)
                {
                    return(null);
                }

                //TODO : add support for http://www.idlebrain.com/movie/photogallery/madhusharma1.html

                var imgNodes = Helper.AllChild(container, "img");

                if (imgNodes == null)
                {
                    return(null);
                }

                int index = 0;
                List <WebPageData> data = new List <WebPageData>();

                foreach (var imgNode in imgNodes)
                {
                    string thSrc = (webDir.EndsWith("/") ? webDir : webDir + "/") + imgNode.GetAttributeValue("src", "");

                    var singleData = WebPageData.GetTextOnly($"{index++}", "");
                    singleData.IsFinal = true;
                    singleData.UID     = UidGenerator();

                    singleData.ImageUrl = thSrc;    //image link

                    var imgDefi = new ImageDefinition()
                    {
                        thumbnil = thSrc,
                        original = (webDir.EndsWith("/") ? webDir : webDir + "/") + imgNode.GetAttributeValue("src", "").Replace("th_", "")
                    };

                    AlbumImages.Add(imgDefi);

                    data.Add(singleData);
                }
                return(data.ToArray());
            }