/** * it will invoke the worker to start working on the tasks - never returns */ public void run() { int requestNum = 0, timeoutCounter = 0; bool needToTerminate = false; TimeSpan totalProcessTime; Thread.Sleep(10000); while (needToTerminate == false) { DateTime startTime = DateTime.Now; try { //System.Console.WriteLine("-<>--------------------------------------------------------------------------"); SyncAccessor.getSlot(2, 1); Url task = SyncAccessor.getFromQueue <Url>(_tasks, _timer); //System.Console.WriteLine(" Start Working on : " + task.getUrl() + " ..."); ResourceContent content = _fetchers.fetchResource(task.getUrl()); if (content.isValid() != true) { timeoutCounter++; //System.Console.WriteLine(" Fetch Failed Ignoring ... "); continue; } //System.Console.WriteLine(" Fetched Successfully ... "); ResourceContent modifiedContent = new ResourceContent(content.getResourceUrl(), content.getResourceType() , content.getResourceContent(), content.getReturnCode(), task.getRank()); DateTime startProcess = DateTime.Now; _processors.processResource(modifiedContent); DateTime endProcess = DateTime.Now; totalProcessTime = endProcess - startProcess; //System.Console.WriteLine(" URL Processed Successfully ... "); System.Console.WriteLine(" URL Processed Successfully ... "); } catch (Exception e) { //System.Console.WriteLine("[Exception Happened] " + e); RuntimeStatistics.addToErrors(1); continue; } DateTime endTime = DateTime.Now; TimeSpan totalRequestTime = endTime - startTime; if (LogDebuggerControl.getInstance().enableTiming) { // write request time to timing log file StreamWriter sw = new StreamWriter("_DEBUG_INFO_TIMING@" + System.Threading.Thread.CurrentThread.ManagedThreadId + ".txt", true); sw.WriteLine(" TIMING FOR REQ - " + requestNum++ + " takes about " + totalRequestTime.TotalSeconds + " s, Processed At " + totalProcessTime.TotalSeconds + " s"); sw.Close(); } } }
/** * This is a test method that checks if the class resourceContent works fine */ public static void Test() { String rsrcurl = "www.adamsearch.com"; ResourceType rsrcType = ResourceType.HtmlResource; String rsrcContent = "Please click the next buttom to start crawling !!"; int rtrnCode = 200; ResourceContent htmlResource1 = new ResourceContent(rsrcurl, rsrcType, rsrcContent, rtrnCode,0); Console.WriteLine("The resource is : " + htmlResource1.isValid()); Console.WriteLine("Get url: " + htmlResource1.getResourceUrl()); Console.WriteLine("Get resourceType: " + htmlResource1.getResourceType()); Console.WriteLine("Get resourceContent: " + htmlResource1.getResourceContent()); Console.WriteLine("to string:\n" + htmlResource1); }
/** * This is a test method that checks if the class resourceContent works fine */ public static void Test() { String rsrcurl = "www.adamsearch.com"; ResourceType rsrcType = ResourceType.HtmlResource; String rsrcContent = "Please click the next buttom to start crawling !!"; int rtrnCode = 200; ResourceContent htmlResource1 = new ResourceContent(rsrcurl, rsrcType, rsrcContent, rtrnCode, 0); Console.WriteLine("The resource is : " + htmlResource1.isValid()); Console.WriteLine("Get url: " + htmlResource1.getResourceUrl()); Console.WriteLine("Get resourceType: " + htmlResource1.getResourceType()); Console.WriteLine("Get resourceContent: " + htmlResource1.getResourceContent()); Console.WriteLine("to string:\n" + htmlResource1); }
/** * This method process the given content, it will log the return code for * statistic anlaysis and processing the resource via the attached processors; * note if the recourse is illegal so the content will be ignored and there's no * further processment on it. */ public void processResource(ResourceContent resource) { if (resource.isValid() == false) { failure++; return; } else { success++; } foreach (String processorId in resourceProcessors.Keys) { if (resourceProcessors[processorId].canProcess(resource) == true) resourceProcessors[processorId].process(resource); } }
/** * This method process the given content, it will log the return code for * statistic anlaysis and processing the resource via the attached processors; * note if the recourse is illegal so the content will be ignored and there's no * further processment on it. */ public void processResource(ResourceContent resource) { if (resource.isValid() == false) { failure++; return; } else { success++; } foreach (String processorId in resourceProcessors.Keys) { if (resourceProcessors[processorId].canProcess(resource) == true) { resourceProcessors[processorId].process(resource); } } }
/** * This method returns a boolean value which indicates if the given resource * can be processed by the processor or not */ public bool canProcess(ResourceContent resource) { return(resource.isValid()); }
/** * This method returns a boolean value which indicates if the given resource * can be processed by the processor or not */ public bool canProcess(ResourceContent resource) { return (resource.isValid()); }