/// <summary>
        /// Performs the work to reduce the list. Counts the
        ///  terms and the sorts it and reduces it to twenty entries.
        ///
        /// Processing Started and completed are implemented.
        /// </summary>
        public void executeStep()
        {
            if (ProcessingStarted != null)
            {
                ProcessingStarted(this, new EventArgs());
            }

            Dictionary <string, int> _info = new Dictionary <string, int>();

            //Gather the 20 most common terms and return the results via the event args.
            while (!_wordListIn.IsCompleted)
            {
                string nextItem = string.Empty;
                bool   hasNext  = _wordListIn.TryTake(out nextItem);
                if (hasNext)
                {
                    if (_info.ContainsKey(nextItem))
                    {
                        _info[nextItem]++;
                    }
                    else
                    {
                        _info[nextItem] = 1;
                    }
                }
            }

            //Next, par down to just top 20 (note, we could probably sperate this...)
            ArrayList _finalList = new ArrayList();

            foreach (var itm in _info)
            {
                _finalList.Add(new OrderedWord(itm.Value, itm.Key));
            }
            _finalList.Sort();

            if (_finalList.Count > 20)
            {
                _finalList.RemoveRange(20, _finalList.Count - 20);
            }

            TwentyCommonEventArgs e = new TwentyCommonEventArgs();

            e.Terms = _finalList;

            if (ProcessingCompleted != null)
            {
                ProcessingCompleted(this, e);
            }
        }
예제 #2
0
        // This does the setup of the pipeline (backgroundWorker is another thread)
        //  This setups the word list objects (new objects) and then builds a list
        //  of threads to be executed. Each thread is a step in the pipeline. The
        //  order started doesn't much matter. New steps can be added by
        //  inserting a step {anywhere} within the thread list. Once all threads
        //  are ready to go, all are started. Threads are smart enough to process
        //  when they can and wait when they shouldn't be processing.
        private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
        {
            this._wordList_step1 = new BlockingCollection <string>();
            this._wordList_step2 = new BlockingCollection <string>();
            this._wordList_step3 = new BlockingCollection <string>();
            this._wordList_step4 = new BlockingCollection <string>();
            this._wordList_step5 = new BlockingCollection <string>();

            this._stopList = new BlockingCollection <string>();

            List <Thread> threadList2 = new List <Thread>();

            // Load the stop words. Nothing blocks on start
            IProcessingStep stepStopWord = new LoadStopWords(txtStopListFile.Text, ref _stopList);

#if DEBUG
            stepStopWord.ProcessingStarted   += startInstrumentationTimer;
            stepStopWord.ProcessingCompleted += stopInstrumentionTimer;
#endif
            Thread stopWordThread = new Thread(new ThreadStart(stepStopWord.executeStep));
            stopWordThread.Name = "Load Stop Words";
            threadList2.Add(stopWordThread);

            // Load the input. Nothing blocks on start
            IProcessingStep stepInputFile = new DataInput(txtInputFile.Text, ref _wordList_step1);
#if DEBUG
            stepInputFile.ProcessingStarted   += startInstrumentationTimer;
            stepInputFile.ProcessingCompleted += stopInstrumentionTimer;
#endif
            Thread inputThread = new Thread(new ThreadStart(stepInputFile.executeStep));
            inputThread.Name = "Load Input";
            threadList2.Add(inputThread);

            //Remove punctuation.
            IProcessingStep stepRemovePunctuation = new RemovePunctuation(ref _wordList_step1, ref _wordList_step2);
#if DEBUG
            stepRemovePunctuation.ProcessingStarted   += startInstrumentationTimer;
            stepRemovePunctuation.ProcessingCompleted += stopInstrumentionTimer;
#endif
            Thread removePunctuationThread = new Thread(new ThreadStart(stepRemovePunctuation.executeStep));
            removePunctuationThread.Name = "Remove Punctation";
            threadList2.Add(removePunctuationThread);

            //We should de-dup the list...
            //IProcessingStep stepDeDup = new DeDup(ref _wordList_step2, ref _wordList_step3);
            //Thread deDupThread = new Thread(new ThreadStart(stepDeDup.executeStep));
            //threadList2.Add(deDupThread);

            //Remove stop words - At this point, the stop list must be ready to go (100% loaded). There is logic in the
            // 'RemoveStopWords' Processing Step to assure this.
            IProcessingStep stepRemoveStopWords = new RemoveStopWords(_stopList, ref _wordList_step2, ref _wordList_step4);
#if DEBUG
            stepRemoveStopWords.ProcessingStarted   += startInstrumentationTimer;
            stepRemoveStopWords.ProcessingCompleted += stopInstrumentionTimer;
#endif
            Thread removeStopWords = new Thread(new ThreadStart(stepRemoveStopWords.executeStep));
            removeStopWords.Name = "Remove Stop Words";
            stepRemoveStopWords.SuspendProcessing += delegate(object delSender, SuspendRequestEventArgs evtArg) {
                System.Diagnostics.Debug.WriteLine("Hit Suspend Processing. Waiting...");
                Thread.Sleep(evtArg.SuspendTimeMillaSeconds);
                removeStopWords      = new Thread(new ThreadStart(stepRemoveStopWords.executeStep));
                removeStopWords.Name = "Remove Stop Words re-created thread";
                removeStopWords.Start();
            };
            threadList2.Add(removeStopWords);

            //Stemming algorthum.
            IProcessingStep stepStemming = new ApplyStemming(ref _wordList_step4, ref _wordList_step5);
#if DEBUG
            stepStemming.ProcessingStarted   += startInstrumentationTimer;
            stepStemming.ProcessingCompleted += stopInstrumentionTimer;
#endif
            Thread StemmingThread = new Thread(new ThreadStart(stepStemming.executeStep));
            StemmingThread.Name = "Applying Stemming";
            threadList2.Add(StemmingThread);

            //20 most frequently occurring terms - DeDupping may need to be removed.. huh.
            // Return data via completed event handler? Seems reasonable.
            IProcessingStep stepOutput = new DataOutput_TwentyCommon(ref _wordList_step5);
#if DEBUG
            stepOutput.ProcessingStarted   += startInstrumentationTimer;
            stepOutput.ProcessingCompleted += stopInstrumentionTimer;
#endif
            Thread outputThread = new Thread(new ThreadStart(stepOutput.executeStep));
            outputThread.Name = "Preparing Final Output";
            threadList2.Add(outputThread);

            //Start all the threads.
            foreach (var th in threadList2)
            {
                th.Start();
            }

            //Clean up processsing (as a delegate); show the results and reset the main form.
            stepOutput.ProcessingCompleted += delegate(object sender3, EventArgs evt3)
            {
                //MessageBox.Show("stepOutput Done.");
                TwentyCommonEventArgs evtArgs  = (TwentyCommonEventArgs)evt3;
                ArrayList             _terms   = evtArgs.Terms;
                frmResults            _results = new frmResults(_terms);
                _results.ShowDialog();

                this.changeState(false);
            };
        }