Esempio n. 1
0
        public Link(Page from, Page to)
        {
            using (PageDBContext pc = new PageDBContext())
            {
                Link query = null;
                try
                {
                    query = pc.Link.First(l => l.FromPage == from.PageID && l.ToPage == to.PageID);
                }
                catch (InvalidOperationException iex)
                { }

                if (query == null)
                {
                    this.FromPage = from.PageID;
                    this.ToPage = to.PageID;
                    pc.Link.Add(this);
                    pc.SaveChanges();
                }
                else
                {
                    this.FromPage = query.FromPage;
                    this.ToPage = query.ToPage;
                    this.LinkID = query.LinkID;
                }
            }
        }
Esempio n. 2
0
        public PageWord(String page, string word, int location)
        {
            using (PageDBContext pc = new PageDBContext())
            {
                Page temp_page = null;
                Word temp_word = null;
                PageWord page_word_query = null;

                temp_page = new Page(page);
                temp_word = new Word(word);

                try
                {
                    page_word_query = pc.PageWord.First(pw => pw.WordID == temp_word.WordID && pw.PageID == temp_page.PageID && Location == location);
                }
                catch (InvalidOperationException iex)
                { }

                if (page_word_query == null)
                {
                    this.Location = location;
                    this.PageID = temp_page.PageID;
                    this.WordID = temp_word.WordID;
                    pc.PageWord.Add(this);
                    pc.SaveChanges();
                }
                else
                {
                    this.WordID = page_word_query.WordID;
                    this.PageWordID = page_word_query.PageWordID;
                    this.PageID = page_word_query.PageID;
                    this.Location = page_word_query.Location;
                }
            }
        }
Esempio n. 3
0
        public Word(String word)
        {
            using (PageDBContext pc = new PageDBContext())
            {
                Word query = null;
                try
                {
                    query = pc.Words.First(w => w.WordValue == word);
                }
                catch (InvalidOperationException iex)
                { }

                if (query == null)
                {
                    WordValue = word;
                    WordStem = Word.Stem(word);
                    pc.Words.Add(this);
                    pc.SaveChanges();
                }
                else
                {
                    this.WordID = query.WordID;
                    this.WordValue = query.WordValue;
                    this.WordStem = query.WordStem;
                }
            }
        }
Esempio n. 4
0
        public Page(string URL)
        {
            using (PageDBContext pc = new PageDBContext())
            {
                Page query = null;
                try
                {
                    query = pc.Pages.First(p => p.Url == URL);
                }
                catch (InvalidOperationException iex)
                { }

                if (query == null)
                {
                    this.Url = URL;
                    pc.Pages.Add(this);
                    pc.SaveChanges();
                }
                else
                {
                    this.PageID = query.PageID;
                    this.Url = query.Url;
                }
            }
        }
Esempio n. 5
0
        public override List<SearchResults> SelectAndRankPages(List<PageWord> page_words)
        {
            using (PageDBContext pc = new PageDBContext())
            {
                var pages_results = (from pw in page_words
                                     group pw by pw into pwGroup
                                     select new SearchResults(pwGroup.Key.ConcretePage, pwGroup.Sum(pw => 1f / pw.Location))).Distinct();

                this.Results = pages_results.Distinct().ToList();
            }

            return this.Results;
        }
Esempio n. 6
0
        /// <summary>
        /// Searching by word frequency on page, more is better
        /// </summary>
        public override List<SearchResults> SelectAndRankPages(List<PageWord> page_words)
        {
            using (PageDBContext pc = new PageDBContext())
            {
                var pages_results = (from p in
                                         from pw in page_words
                                         select pw.ConcretePage
                                     group p by p into pagesGroup
                                     select new SearchResults(pagesGroup.Key, (float)pagesGroup.Count())).Distinct();
                this.Results = pages_results.ToList();
            }

            return this.Results;
        }
Esempio n. 7
0
        public override List<SearchResults> SelectAndRankPages(List<PageWord> page_words)
        {
            using (PageDBContext pc = new PageDBContext())
            {
                var pages = (from p in
                                 from pw in page_words
                                 select pw.ConcretePage
                             select p).Distinct();
                foreach (Page p in pages)
                {
                    this.Results.Add(new SearchResults(p, 1));
                }
            }

            return this.Results;
        }
Esempio n. 8
0
        public List<PageWord> SelectPageWords()
        {
            string[] words = this.Query.ToLowerInvariant().Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            List<PageWord> res = new List<PageWord>();
            using (PageDBContext pc = new PageDBContext())
            {
                foreach (var word in words)
                {
                    var stem = Word.Stem(word);
                    var q = from pw in pc.PageWord
                            join w in pc.Words on pw.WordID equals w.WordID
                            join p in pc.Pages on pw.PageID equals p.PageID
                            where w.WordStem == stem
                            select pw;
                    res.AddRange(q);
                }
            }

            return res;
        }
Esempio n. 9
0
        public static List<PageWord> GetPageWordByWord(string word)
        {
            List<PageWord> res = new List<PageWord>();
            using (PageDBContext pc = new PageDBContext())
            {
                int wordId = new Word(word).WordID;
                IEnumerable<PageWord> query = from pw in pc.PageWord
                                              where pw.WordID == wordId
                                              select pw;
                res.AddRange(query);
            }

            return res;
        }
Esempio n. 10
0
        public static List<PageWord> GetPageWordByStem(string stem)
        {
            List<PageWord> res = new List<PageWord>();
            using (PageDBContext pc = new PageDBContext())
            {
                IEnumerable<Word> words = from w in pc.Words
                                          where w.WordStem == stem
                                          select w;
                foreach (Word w in words)
                {
                    IEnumerable<PageWord> query = from pw in pc.PageWord
                                                  where pw.WordID == w.WordID
                                                  select pw;
                    res.AddRange(query);
                }
            }

            return res;
        }
Esempio n. 11
0
 public void CrawlNext(int depth)
 {
     using (PageDBContext pc = new PageDBContext())
     {
         foreach (Page p in pc.Pages)
         {
             if (String.IsNullOrEmpty(p.PageTitle))
             {
                 this.Crawl(p.Url, depth);
             }
         }
     }
 }
Esempio n. 12
0
        public void SaveEntity()
        {
            PageDBContext pc = new PageDBContext();
            Page temp_page;
            temp_page = new Page(URL, ExtractedTitle);
            List<Page> temp_page_list = new List<Page>();
            for (int i = 0; i < LINKS.Count(); i++)
            {
                Page another_temp_page;
                another_temp_page = new Page(LINKS[i]);
                new Link(temp_page, another_temp_page);
            }

            StringBuilder s_builder = new StringBuilder(ExtractedHeaders);
            s_builder.Append(ExtractedParagraphs);
            s_builder.Append(ExtractedDivs);
            s_builder.Append(ExtractedLinksText);
            string[] words = s_builder.ToString().Split(new char[] { ' ', '\n', '.', ',', '\'', '(', ')', ':', '/', '\\', '[', ']', '\"' }, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < words.Length; ++i)
            {
                new PageWord(URL, words[i].ToLower(), i + 1);
            }
        }
Esempio n. 13
0
        private List<int> GetWordsIDs()
        {
            List<int> res = new List<int>();
            string[] words = Query.Split(new char[] { ' ', '\n', '.', ',', '\'', '(', ')', ':', '/', '\\', '[', ']', '\"' }, StringSplitOptions.RemoveEmptyEntries);
            using (PageDBContext pc = new PageDBContext())
            {
                for (int i = 0; i < words.Length; ++i)
                {
                    res.AddRange(from w in pc.Words
                                 where w.WordStem == Word.Stem(words[i])
                                 select w.WordID);
                }
            }

            return res;
        }