Exemplo n.º 1
0
        /// <summary>
        /// Two argument constructor for TwoPassDataIndexer.
        /// </summary>
        /// <param name="eventReader">
        /// An ITrainingEventReader which contains the a list of all the events
        /// seen in the training data.
        /// </param>
        /// <param name="cutoff">
        /// The minimum number of times a predicate must have been
        /// observed in order to be included in the model.
        /// </param>
        public TwoPassDataIndexer(ITrainingEventReader eventReader, int cutoff)
        {
            Dictionary <string, int> predicateIndex;
            List <ComparableEvent>   eventsToCompare;

            predicateIndex = new Dictionary <string, int>();
            //NotifyProgress("Indexing events using cutoff of " + cutoff + "\n");

            //NotifyProgress("\tComputing event counts...  ");

            string tempFile = new FileInfo(Path.GetTempFileName()).FullName;

            int eventCount = ComputeEventCounts(eventReader, tempFile, predicateIndex, cutoff);

            //NotifyProgress("done. " + eventCount + " events");

            //NotifyProgress("\tIndexing...  ");

            using (FileEventReader fileEventReader = new FileEventReader(tempFile))
            {
                eventsToCompare = Index(eventCount, fileEventReader, predicateIndex);
            }

            if (File.Exists(tempFile))
            {
                File.Delete(tempFile);
            }

            //NotifyProgress("done.");

            //NotifyProgress("Sorting and merging events... ");
            SortAndMerge(eventsToCompare);
            //NotifyProgress("Done indexing.");
        }
Exemplo n.º 2
0
        /// <summary>
        /// Reads events from <tt>eventStream</tt> into a dictionary.  The
        /// predicates associated with each event are counted and any which
        /// occur at least <tt>cutoff</tt> times are added to the
        /// <tt>predicatesInOut</tt> map along with a unique integer index.
        /// </summary>
        /// <param name="eventReader">
        /// an <code>ITrainingEventReader</code> value
        /// </param>
        /// <param name="eventStoreFile">
        /// a file name to which the events are written to for later processing.
        /// </param>
        /// <param name="predicatesInOut">
        /// a <code>Dictionary</code> value
        /// </param>
        /// <param name="cutoff">
        /// an <code>int</code> value
        /// </param>
        private int ComputeEventCounts(ITrainingEventReader eventReader, string eventStoreFile, Dictionary <string, int> predicatesInOut, int cutoff)
        {
            Dictionary <string, int> counter = new Dictionary <string, int>();
            int predicateIndex = 0;
            int eventCount     = 0;

            using (StreamWriter eventStoreWriter = new StreamWriter(eventStoreFile))
            {
                while (eventReader.HasNext())
                {
                    TrainingEvent currentTrainingEvent = eventReader.ReadNextEvent();
                    eventCount++;
                    eventStoreWriter.Write(FileEventReader.ToLine(currentTrainingEvent));
                    string[] eventContext = currentTrainingEvent.GetContext();
                    for (int currentPredicate = 0; currentPredicate < eventContext.Length; currentPredicate++)
                    {
                        if (!predicatesInOut.ContainsKey(eventContext[currentPredicate]))
                        {
                            if (counter.ContainsKey(eventContext[currentPredicate]))
                            {
                                counter[eventContext[currentPredicate]]++;
                            }
                            else
                            {
                                counter.Add(eventContext[currentPredicate], 1);
                            }
                            if (counter[eventContext[currentPredicate]] >= cutoff)
                            {
                                predicatesInOut.Add(eventContext[currentPredicate], predicateIndex++);
                                counter.Remove(eventContext[currentPredicate]);
                            }
                        }
                    }
                }
            }
            return(eventCount);
        }