/// <summary> /// Performs the work to reduce the list. Counts the /// terms and the sorts it and reduces it to twenty entries. /// /// Processing Started and completed are implemented. /// </summary> public void executeStep() { if (ProcessingStarted != null) { ProcessingStarted(this, new EventArgs()); } Dictionary <string, int> _info = new Dictionary <string, int>(); //Gather the 20 most common terms and return the results via the event args. while (!_wordListIn.IsCompleted) { string nextItem = string.Empty; bool hasNext = _wordListIn.TryTake(out nextItem); if (hasNext) { if (_info.ContainsKey(nextItem)) { _info[nextItem]++; } else { _info[nextItem] = 1; } } } //Next, par down to just top 20 (note, we could probably sperate this...) ArrayList _finalList = new ArrayList(); foreach (var itm in _info) { _finalList.Add(new OrderedWord(itm.Value, itm.Key)); } _finalList.Sort(); if (_finalList.Count > 20) { _finalList.RemoveRange(20, _finalList.Count - 20); } TwentyCommonEventArgs e = new TwentyCommonEventArgs(); e.Terms = _finalList; if (ProcessingCompleted != null) { ProcessingCompleted(this, e); } }
// This does the setup of the pipeline (backgroundWorker is another thread) // This setups the word list objects (new objects) and then builds a list // of threads to be executed. Each thread is a step in the pipeline. The // order started doesn't much matter. New steps can be added by // inserting a step {anywhere} within the thread list. Once all threads // are ready to go, all are started. Threads are smart enough to process // when they can and wait when they shouldn't be processing. private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e) { this._wordList_step1 = new BlockingCollection <string>(); this._wordList_step2 = new BlockingCollection <string>(); this._wordList_step3 = new BlockingCollection <string>(); this._wordList_step4 = new BlockingCollection <string>(); this._wordList_step5 = new BlockingCollection <string>(); this._stopList = new BlockingCollection <string>(); List <Thread> threadList2 = new List <Thread>(); // Load the stop words. Nothing blocks on start IProcessingStep stepStopWord = new LoadStopWords(txtStopListFile.Text, ref _stopList); #if DEBUG stepStopWord.ProcessingStarted += startInstrumentationTimer; stepStopWord.ProcessingCompleted += stopInstrumentionTimer; #endif Thread stopWordThread = new Thread(new ThreadStart(stepStopWord.executeStep)); stopWordThread.Name = "Load Stop Words"; threadList2.Add(stopWordThread); // Load the input. Nothing blocks on start IProcessingStep stepInputFile = new DataInput(txtInputFile.Text, ref _wordList_step1); #if DEBUG stepInputFile.ProcessingStarted += startInstrumentationTimer; stepInputFile.ProcessingCompleted += stopInstrumentionTimer; #endif Thread inputThread = new Thread(new ThreadStart(stepInputFile.executeStep)); inputThread.Name = "Load Input"; threadList2.Add(inputThread); //Remove punctuation. IProcessingStep stepRemovePunctuation = new RemovePunctuation(ref _wordList_step1, ref _wordList_step2); #if DEBUG stepRemovePunctuation.ProcessingStarted += startInstrumentationTimer; stepRemovePunctuation.ProcessingCompleted += stopInstrumentionTimer; #endif Thread removePunctuationThread = new Thread(new ThreadStart(stepRemovePunctuation.executeStep)); removePunctuationThread.Name = "Remove Punctation"; threadList2.Add(removePunctuationThread); //We should de-dup the list... //IProcessingStep stepDeDup = new DeDup(ref _wordList_step2, ref _wordList_step3); //Thread deDupThread = new Thread(new ThreadStart(stepDeDup.executeStep)); //threadList2.Add(deDupThread); //Remove stop words - At this point, the stop list must be ready to go (100% loaded). There is logic in the // 'RemoveStopWords' Processing Step to assure this. IProcessingStep stepRemoveStopWords = new RemoveStopWords(_stopList, ref _wordList_step2, ref _wordList_step4); #if DEBUG stepRemoveStopWords.ProcessingStarted += startInstrumentationTimer; stepRemoveStopWords.ProcessingCompleted += stopInstrumentionTimer; #endif Thread removeStopWords = new Thread(new ThreadStart(stepRemoveStopWords.executeStep)); removeStopWords.Name = "Remove Stop Words"; stepRemoveStopWords.SuspendProcessing += delegate(object delSender, SuspendRequestEventArgs evtArg) { System.Diagnostics.Debug.WriteLine("Hit Suspend Processing. Waiting..."); Thread.Sleep(evtArg.SuspendTimeMillaSeconds); removeStopWords = new Thread(new ThreadStart(stepRemoveStopWords.executeStep)); removeStopWords.Name = "Remove Stop Words re-created thread"; removeStopWords.Start(); }; threadList2.Add(removeStopWords); //Stemming algorthum. IProcessingStep stepStemming = new ApplyStemming(ref _wordList_step4, ref _wordList_step5); #if DEBUG stepStemming.ProcessingStarted += startInstrumentationTimer; stepStemming.ProcessingCompleted += stopInstrumentionTimer; #endif Thread StemmingThread = new Thread(new ThreadStart(stepStemming.executeStep)); StemmingThread.Name = "Applying Stemming"; threadList2.Add(StemmingThread); //20 most frequently occurring terms - DeDupping may need to be removed.. huh. // Return data via completed event handler? Seems reasonable. IProcessingStep stepOutput = new DataOutput_TwentyCommon(ref _wordList_step5); #if DEBUG stepOutput.ProcessingStarted += startInstrumentationTimer; stepOutput.ProcessingCompleted += stopInstrumentionTimer; #endif Thread outputThread = new Thread(new ThreadStart(stepOutput.executeStep)); outputThread.Name = "Preparing Final Output"; threadList2.Add(outputThread); //Start all the threads. foreach (var th in threadList2) { th.Start(); } //Clean up processsing (as a delegate); show the results and reset the main form. stepOutput.ProcessingCompleted += delegate(object sender3, EventArgs evt3) { //MessageBox.Show("stepOutput Done."); TwentyCommonEventArgs evtArgs = (TwentyCommonEventArgs)evt3; ArrayList _terms = evtArgs.Terms; frmResults _results = new frmResults(_terms); _results.ShowDialog(); this.changeState(false); }; }