Beispiel #1
0
            public void StartCrawling()
            {
                string commandmessage = myStorage.PeekFromQueue(true);

                if (commandmessage.Equals("getStart"))
                {
                    string url = myStorage.getMsgFromQueue(false);
                    visitedUrl.Add(url);
                    getHTML    parser    = new getHTML(url);
                    Entity     urlentity = new Entity(myurl, "PageUrl");
                    CloudTable table     = tableClient.GetTableReference("hw3table");

                    TableOperation insertOperation = TableOperation.Insert(urlentity);
                    table.Execute(insertOperation);


                    foreach (string rightUrl in parser.GetAllLink())
                    {
                        foreach (string rule in disAllow.ToList())
                        {
                            if ((!rightUrl.Contains(rule)) && (!visitedUrl.Contains(rightUrl)))
                            {
                                myStorage.AddQueue(rightUrl, false);
                                count++;
                            }
                            else
                            {
                                errorList.Add(rightUrl);
                            }
                        }
                    }
                }
                else if (commandmessage.Equals("done"))
                {
                    myStorage.getMsgFromQueue(true);
                    errorcount++;
                }
            }
Beispiel #2
0
            public void StartCrawling()
            {
                string commandmessage = myStorage.PeekFromQueue(true);
                if (commandmessage.Equals("getStart"))
                {
                    string url = myStorage.getMsgFromQueue(false);
                    visitedUrl.Add(url);
                    getHTML parser = new getHTML(url);
                    Entity urlentity = new Entity( myurl, "PageUrl");
                    CloudTable table = tableClient.GetTableReference("hw3table");

                    TableOperation insertOperation = TableOperation.Insert(urlentity);
                    table.Execute(insertOperation);

                    foreach (string rightUrl in parser.GetAllLink())
                    {
                        foreach (string rule in disAllow.ToList())
                        {
                            if ((!rightUrl.Contains(rule)) && (!visitedUrl.Contains(rightUrl)))
                            {
                                myStorage.AddQueue(rightUrl, false);
                                count++;
                            }
                            else
                            {
                                errorList.Add(rightUrl);
                            }
                        }
                    }
                }
                else if (commandmessage.Equals("done"))
                {
                    myStorage.getMsgFromQueue(true);
                    errorcount++;
                }
            }