Example #1
0
        public void CreateDatabaseEntry(string fileidentifier)
        {
            DescriptionScraper ds = new DescriptionScraper(fileidentifier, false);
            Entry e = ds.Scrape();

            if (e.Identifier != "ERROR")
            {
                System.Console.WriteLine("Entry Added For " + e.Identifier);
                this.db.AddEntryObject(e);
            }
            else
            {
                System.Console.WriteLine("An Error Occurred.");
            }
        }
        public Entry Scrape()
        {
            HtmlWeb      hw  = new HtmlWeb();
            HtmlDocument doc = hw.Load(url);

            string[] substrings;
            string   title = "";
            string   abstr = "";

            string[] tags = null;

            if (doc == null)
            {
                return new Entry {
                           Identifier = "ERROR"
                }
            }
            ;

            try
            {
                foreach (HtmlNode node in doc.DocumentNode.SelectNodes("//p"))
                {
                    substrings    = node.InnerText.ToLower().Split(':');
                    substrings[0] = substrings[0].Substring(0, substrings[0].Length - 1);

                    if (substrings[0].Contains("title"))
                    {
                        title = substrings[1].Trim().Replace(" ", "").Replace("*", "");
                    }

                    if (substrings[0].Contains("abstract"))
                    {
                        abstr = substrings[1].Trim().Replace(" ", "").Replace("*", "");
                    }

                    if (substrings[0].Contains("descriptors"))
                    {
                        substrings[1] = substrings[1].Trim().ToLower().Replace(" ", "").Replace("*", "");
                        tags          = substrings[1].Split(',');

                        for (int i = 0; i < substrings.Length; i++)
                        {
                            substrings[i] = substrings[i].Trim();
                        }
                    }
                }

                if (title != null && abstr != null && tags != null)
                {
                    return(new Entry {
                        Identifier = identifier, Title = title, Abstr = abstr, Tags = tags
                    });
                }
                else
                {
                    return(new Entry {
                        Identifier = "ERROR"
                    });
                }
            }
            catch (Exception e)
            {
                if (!retry)
                {
                    DescriptionScraper retry = new DescriptionScraper(identifier, true);
                    return(retry.Scrape());
                }
                else
                {
                    return(new Entry {
                        Identifier = "ERROR"
                    });
                }
            }
        }
    }