public void addURLData(URLData newData)
 {
     string url = newData.url;
     storageSema.WaitOne();
     try
     {
         urlData.Add(url, newData);
     }
     catch (ArgumentException)
     {
         //Duplicate Item Found that is older
         URLData oldData = urlData[url];
         if (DateTime.Compare(oldData.dateModified, newData.dateModified) > 0)
         {
             urlData[url] = newData;;
         }
     }
     storageSema.Release();
 }
        public void addURLData(URLData newData)
        {
            string url = newData.url;

            storageSema.WaitOne();
            try
            {
                urlData.Add(url, newData);
            }
            catch (ArgumentException)
            {
                //Duplicate Item Found that is older
                URLData oldData = urlData[url];
                if (DateTime.Compare(oldData.dateModified, newData.dateModified) > 0)
                {
                    urlData[url] = newData;;
                }
            }
            storageSema.Release();
        }
Exemplo n.º 3
0
        public void Index(object urlDataDateTuple)
        {
            //Get Data from Indexer
            Tuple <string, string, DateTime> urlDataDate = (Tuple <string, string, DateTime>)urlDataDateTuple;

            Console.WriteLine("Indexer " + indexerID + " got the following URL: {0}", urlDataDate.Item1);

            //Process Data
            Tuple <Dictionary <string, int>, List <string> > frequenciesAndURLs = getFrequenciesAndURLs(urlDataDate.Item1, urlDataDate.Item2);

            //Send info to database
            URLData newData = new URLData(urlDataDate.Item1, urlDataDate.Item3, frequenciesAndURLs.Item1);

            Program.database.addURLData(newData);
            //Console.WriteLine("Sent info into database");

            //Send new URLs to Scheduler if they haven't been crawled
            URLScheduler scheduler = Program.urlScheduler;

            scheduler.queueSema.WaitOne();
            foreach (var newURL in frequenciesAndURLs.Item2)
            {
                scheduler.urlQueue.Enqueue(newURL);
            }
            scheduler.queueSema.Release();
            //Console.WriteLine("Sent new URL's to URL scheduler");

            //Put self back into Index Scheduler
            IndexScheduler indexScheduler = Program.indexScheduler;

            indexScheduler.indexerSema.WaitOne();
            indexScheduler.indexerQueue.Enqueue(this);
            indexScheduler.indexerSema.Release();

            //Debugging
            if (Program.debugMode)
            {
                Console.WriteLine(frequenciesAndURLs.Item2.Count + " URLs added to scheduler");
            }
        }
        public void Index(object urlDataDateTuple)
        {
            //Get Data from Indexer
            Tuple <string,string,DateTime> urlDataDate = (Tuple <string,string,DateTime>)urlDataDateTuple;
            Console.WriteLine("Indexer " + indexerID + " got the following URL: {0}",urlDataDate.Item1);

            //Process Data
            Tuple<Dictionary<string, int>, List<string>> frequenciesAndURLs = getFrequenciesAndURLs(urlDataDate.Item1, urlDataDate.Item2);

            //Send info to database
            URLData newData = new URLData(urlDataDate.Item1, urlDataDate.Item3, frequenciesAndURLs.Item1);
            Program.database.addURLData(newData);
            //Console.WriteLine("Sent info into database");

            //Send new URLs to Scheduler if they haven't been crawled
            URLScheduler scheduler = Program.urlScheduler;
            scheduler.queueSema.WaitOne();
            foreach (var newURL in frequenciesAndURLs.Item2)
            {
                scheduler.urlQueue.Enqueue(newURL);
            }
            scheduler.queueSema.Release();
            //Console.WriteLine("Sent new URL's to URL scheduler");

            //Put self back into Index Scheduler
            IndexScheduler indexScheduler = Program.indexScheduler;
            indexScheduler.indexerSema.WaitOne();
            indexScheduler.indexerQueue.Enqueue(this);
            indexScheduler.indexerSema.Release();

            //Debugging
            if (Program.debugMode)
            {
                Console.WriteLine(frequenciesAndURLs.Item2.Count + " URLs added to scheduler");
            }
        }