コード例 #1
0
ファイル: Crawler.cs プロジェクト: openyang/cnntextmining
        public void Run2()
        {
            SqlTransaction trans = null;

            int count = 0;

            while (true)
            {
                using (var command2
                           = new SqlCommand(
                                 "SELECT TOP 1 TopicURL FROM dbo.[TopicsTmp] WHERE Visited IS NULL", conn))
                {
                    object result = command2.ExecuteScalar();

                    if (result == DBNull.Value)
                    {
                        return;
                    }

                    Console.WriteLine("-PROCESSING: " + result);
                    var action = new TopicAction(conn);

                    try
                    {
                        action.Do(result.ToString());
                        count++;
                    }
                    finally
                    {
                        using (var command
                                   = new SqlCommand("UPDATE dbo.[TopicsTmp] SET Visited = GETDATE()"
                                                    + " WHERE TopicURL = @url", conn))
                        {
                            command.Parameters.AddWithValue("@url", result.ToString());
                            command.ExecuteNonQuery();
                        }
                    }

                    Console.WriteLine("COUNT = " + count);
                }
            }
        }
コード例 #2
0
ファイル: Crawler.cs プロジェクト: openyang/cnntextmining
        public void Run()
        {
            while (true)
            {
                try
                {
                    //Console.WriteLine("PAGES VISITED: " + history.Count);
                    //Console.WriteLine("NEWS:" + history.NewsCount);
                    //Console.WriteLine("QUEUE SIZE: " + queue.Count);


                    if (queue.Count == 0)
                    {
                        Console.WriteLine("END");
                        break;
                    }

                    Uri curr = queue.Dequeue();
                    Console.WriteLine("PROCESSING: " + curr);

                    if (!history.WasVisited(curr.OriginalString))
                    {
                        try
                        {
                            //----
                            if (!CNNPage.IsNewsPage(curr.OriginalString) &&
                                !CNNPage.isTopicPage(curr.OriginalString))
                            {
                                history.SetVisited(curr);
                                Console.WriteLine("--");
                            }


                            //----
                            if (CNNPage.IsNewsPage(curr.OriginalString))
                            {
                                CNNPage page = new CNNPage(curr.OriginalString);
                                newsAction.Do(page);

                                //----
                                //Uri[] links = page.allLinks.ToArray();
                                //AddLinksToPagesToVisit(links);
                            }

                            //----
                            if (CNNPage.isTopicPage(curr.OriginalString))
                            {
                                var action = new TopicAction(conn);
                                action.Do(curr.OriginalString);

                                //----
                                Uri[] links = GetLinks(curr.OriginalString);
                                AddLinksToPagesToVisit(links);
                            }
                        }
                        finally
                        {
                            history.SetVisited(curr);
                            Console.WriteLine("--");
                        }
                    }
                    else
                    {
                        Console.WriteLine("SKIP: Page have been already visited");
                        Console.WriteLine("--");
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            }
        }
コード例 #3
0
ファイル: Crawler.cs プロジェクト: openyang/cnntextmining
        public void RunTopicRec()
        {
            while (true)
            {
                if (queue.Count == 0)
                {
                    Console.WriteLine("KURWA KONIEC");
                    break;
                }

                Uri curr = queue.Dequeue();
                Console.WriteLine("PROCESSING: " + curr);

                if (!history.WasVisited(curr.OriginalString))
                {
                    //SqlTransaction trans = conn.BeginTransaction();
                    try
                    {
                        if (CNNPage.isTopicPage(curr.OriginalString))
                        {
                            var action = new TopicAction(conn);
                            action.Do(curr.OriginalString);

                            Console.WriteLine("Przetwarzam newsy z topicu");

                            Uri[] links = GetLinks(curr.OriginalString);
                            for (int i = 0; i < links.Length; i++)
                            {
                                try
                                {
                                    if (CNNPage.IsNewsPage(links[i].OriginalString))
                                    {
                                        Console.WriteLine("News: " + i + "/" + links.Length);
                                        newsAction.Do(new CNNPage(links[i].OriginalString));
                                        Console.WriteLine("OK");
                                    }
                                    else
                                    {
                                        Console.WriteLine("Not news :(");
                                    }
                                }
                                catch (Exception ex)
                                {
                                    Console.WriteLine("FAIL");
                                }
                            }
                            //trans.Commit();

                            Console.WriteLine("- Przetworzono topic -");
                        }

                        history.SetVisited(curr);
                        //trans.Commit();
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Wyst¹pi³ b³¹d: " + ex);
//                        trans.Rollback();
                    }
                }
            }
        }