示例#1
0
        /// <summary>
        /// Reads events from <tt>eventReader</tt> into a List&lt;TrainingEvent&gt;.  The
        /// predicates associated with each event are counted and any which
        /// occur at least <tt>cutoff</tt> times are added to the
        /// <tt>predicatesInOut</tt> dictionary along with a unique integer index.
        /// </summary>
        /// <param name="eventReader">
        /// an <code>ITrainingEventReader</code> value
        /// </param>
        /// <param name="predicatesInOut">
        /// a <code>Dictionary</code> value
        /// </param>
        /// <param name="cutoff">
        /// an <code>int</code> value
        /// </param>
        /// <returns>
        /// an <code>List of TrainingEvents</code> value
        /// </returns>
        private List <TrainingEvent> ComputeEventCounts(ITrainingEventReader eventReader, Dictionary <string, int> predicatesInOut, int cutoff)
        {
            var counter        = new Dictionary <string, int>();
            var events         = new List <TrainingEvent>();
            int predicateIndex = 0;

            while (eventReader.HasNext())
            {
                TrainingEvent trainingEvent = eventReader.ReadNextEvent();
                events.Add(trainingEvent);
                string[] eventContext = trainingEvent.Context;
                for (int currentEventContext = 0; currentEventContext < eventContext.Length; currentEventContext++)
                {
                    if (!predicatesInOut.ContainsKey(eventContext[currentEventContext]))
                    {
                        if (counter.ContainsKey(eventContext[currentEventContext]))
                        {
                            counter[eventContext[currentEventContext]]++;
                        }
                        else
                        {
                            counter.Add(eventContext[currentEventContext], 1);
                        }
                        if (counter[eventContext[currentEventContext]] >= cutoff)
                        {
                            predicatesInOut.Add(eventContext[currentEventContext], predicateIndex++);
                            counter.Remove(eventContext[currentEventContext]);
                        }
                    }
                }
            }
            return(events);
        }
示例#2
0
		/// <summary> 
		/// Two argument constructor for TwoPassDataIndexer.
		/// </summary>
		/// <param name="eventReader">
		/// An ITrainingEventReader which contains the a list of all the events
		/// seen in the training data.
		/// </param>
		/// <param name="cutoff">
		/// The minimum number of times a predicate must have been
		/// observed in order to be included in the model.
		/// </param>
		public TwoPassDataIndexer(ITrainingEventReader eventReader, int cutoff)
		{
		    List<ComparableEvent> eventsToCompare;

            var predicateIndex = new Dictionary<string, int>();
			//NotifyProgress("Indexing events using cutoff of " + cutoff + "\n");
			
			//NotifyProgress("\tComputing event counts...  ");
							
			string tempFile = new FileInfo(Path.GetTempFileName()).FullName;
			
			int eventCount = ComputeEventCounts(eventReader, tempFile, predicateIndex, cutoff);
			//NotifyProgress("done. " + eventCount + " events");
			
			//NotifyProgress("\tIndexing...  ");
			
			using (var fileEventReader = new FileEventReader(tempFile))
			{
				eventsToCompare = Index(eventCount, fileEventReader, predicateIndex);
			}
			
			if (File.Exists(tempFile))
			{
				File.Delete(tempFile);
			}
			
			//NotifyProgress("done.");
			
			//NotifyProgress("Sorting and merging events... ");
			SortAndMerge(eventsToCompare);
			//NotifyProgress("Done indexing.");
		}
示例#3
0
        /// <summary>
        /// Two argument constructor for TwoPassDataIndexer.
        /// </summary>
        /// <param name="eventReader">
        /// An ITrainingEventReader which contains the a list of all the events
        /// seen in the training data.
        /// </param>
        /// <param name="cutoff">
        /// The minimum number of times a predicate must have been
        /// observed in order to be included in the model.
        /// </param>
        public TwoPassDataIndexer(ITrainingEventReader eventReader, int cutoff)
        {
            List <ComparableEvent> eventsToCompare;

            var predicateIndex = new Dictionary <string, int>();
            //NotifyProgress("Indexing events using cutoff of " + cutoff + "\n");

            //NotifyProgress("\tComputing event counts...  ");

            string tempFile = new FileInfo(Path.GetTempFileName()).FullName;

            int eventCount = ComputeEventCounts(eventReader, tempFile, predicateIndex, cutoff);

            //NotifyProgress("done. " + eventCount + " events");

            //NotifyProgress("\tIndexing...  ");

            using (var fileEventReader = new FileEventReader(tempFile))
            {
                eventsToCompare = Index(eventCount, fileEventReader, predicateIndex);
            }

            if (File.Exists(tempFile))
            {
                File.Delete(tempFile);
            }

            //NotifyProgress("done.");

            //NotifyProgress("Sorting and merging events... ");
            SortAndMerge(eventsToCompare);
            //NotifyProgress("Done indexing.");
        }
示例#4
0
        private List <ComparableEvent> Index(int eventCount, ITrainingEventReader eventReader, Dictionary <string, int> predicateIndex)
        {
            var outcomeMap      = new Dictionary <string, int>();
            int outcomeCount    = 0;
            var eventsToCompare = new List <ComparableEvent>(eventCount);
            var indexedContext  = new List <int>();

            while (eventReader.HasNext())
            {
                TrainingEvent   currentTrainingEvent = eventReader.ReadNextEvent();
                string[]        eventContext         = currentTrainingEvent.Context;
                ComparableEvent comparableEvent;

                int    outcomeId;
                string outcome = currentTrainingEvent.Outcome;

                if (outcomeMap.ContainsKey(outcome))
                {
                    outcomeId = outcomeMap[outcome];
                }
                else
                {
                    outcomeId = outcomeCount++;
                    outcomeMap.Add(outcome, outcomeId);
                }

                for (int currentPredicate = 0; currentPredicate < eventContext.Length; currentPredicate++)
                {
                    string predicate = eventContext[currentPredicate];
                    if (predicateIndex.ContainsKey(predicate))
                    {
                        indexedContext.Add(predicateIndex[predicate]);
                    }
                }

                // drop events with no active features
                if (indexedContext.Count > 0)
                {
                    comparableEvent = new ComparableEvent(outcomeId, indexedContext.ToArray());
                    eventsToCompare.Add(comparableEvent);
                }
                else
                {
                    //"Dropped event " + currentTrainingEvent.Outcome + ":" + currentTrainingEvent.Context);
                }
                // recycle the list
                indexedContext.Clear();
            }
            SetOutcomeLabels(ToIndexedStringArray(outcomeMap));
            SetPredicateLabels(ToIndexedStringArray(predicateIndex));
            return(eventsToCompare);
        }
示例#5
0
        /// <summary>
        /// Reads events from <tt>eventStream</tt> into a dictionary.  The
        /// predicates associated with each event are counted and any which
        /// occur at least <tt>cutoff</tt> times are added to the
        /// <tt>predicatesInOut</tt> map along with a unique integer index.
        /// </summary>
        /// <param name="eventReader">
        /// an <code>ITrainingEventReader</code> value
        /// </param>
        /// <param name="eventStoreFile">
        /// a file name to which the events are written to for later processing.
        /// </param>
        /// <param name="predicatesInOut">
        /// a <code>Dictionary</code> value
        /// </param>
        /// <param name="cutoff">
        /// an <code>int</code> value
        /// </param>
        private int ComputeEventCounts(ITrainingEventReader eventReader, string eventStoreFile, Dictionary <string, int> predicatesInOut, int cutoff)
        {
            var counter        = new Dictionary <string, int>();
            int predicateIndex = 0;
            int eventCount     = 0;

#if DNF
            using (var eventStoreWriter = new StreamWriter(eventStoreFile))
#else
            using (var stream = new FileStream(eventStoreFile, FileMode.Open))
                using (var eventStoreWriter = new StreamWriter(stream))
#endif
            {
                while (eventReader.HasNext())
                {
                    TrainingEvent currentTrainingEvent = eventReader.ReadNextEvent();
                    eventCount++;
                    eventStoreWriter.Write(FileEventReader.ToLine(currentTrainingEvent));
                    string[] eventContext = currentTrainingEvent.Context;
                    for (int currentPredicate = 0; currentPredicate < eventContext.Length; currentPredicate++)
                    {
                        if (!predicatesInOut.ContainsKey(eventContext[currentPredicate]))
                        {
                            if (counter.ContainsKey(eventContext[currentPredicate]))
                            {
                                counter[eventContext[currentPredicate]]++;
                            }
                            else
                            {
                                counter.Add(eventContext[currentPredicate], 1);
                            }
                            if (counter[eventContext[currentPredicate]] >= cutoff)
                            {
                                predicatesInOut.Add(eventContext[currentPredicate], predicateIndex++);
                                counter.Remove(eventContext[currentPredicate]);
                            }
                        }
                    }
                }
            }
            return(eventCount);
        }
示例#6
0
        /// <summary>
        /// Two argument constructor for OnePassDataIndexer.
        /// </summary>
        /// <param name="eventReader">
        /// An ITrainingEventReader which contains the a list of all the Events
        /// seen in the training data.
        /// </param>
        /// <param name="cutoff">
        /// The minimum number of times a predicate must have been
        /// observed in order to be included in the model.
        /// </param>
        public OnePassDataIndexer(ITrainingEventReader eventReader, int cutoff)
        {
            Dictionary <string, int> predicateIndex;
            List <TrainingEvent>     events;
            List <ComparableEvent>   eventsToCompare;

            predicateIndex = new Dictionary <string, int>();
            //NotifyProgress("Indexing events using cutoff of " + cutoff + "\n");

            //NotifyProgress("\tComputing event counts...  ");
            events = ComputeEventCounts(eventReader, predicateIndex, cutoff);
            //NotifyProgress("done. " + events.Count + " events");

            //NotifyProgress("\tIndexing...  ");
            eventsToCompare = Index(events, predicateIndex);

            //NotifyProgress("done.");

            //NotifyProgress("Sorting and merging oEvents... ");
            SortAndMerge(eventsToCompare);
            //NotifyProgress("Done indexing.");
        }
示例#7
0
		/// <summary> 
		/// Two argument constructor for OnePassDataIndexer.
		/// </summary>
		/// <param name="eventReader">
		/// An ITrainingEventReader which contains the a list of all the Events
		/// seen in the training data.
		/// </param>
		/// <param name="cutoff">
		/// The minimum number of times a predicate must have been
		/// observed in order to be included in the model.
		/// </param>
		public OnePassDataIndexer(ITrainingEventReader eventReader, int cutoff)
		{
            Dictionary<string, int> predicateIndex;
            List<TrainingEvent> events;
			List<ComparableEvent> eventsToCompare;

            predicateIndex = new Dictionary<string, int>();
			//NotifyProgress("Indexing events using cutoff of " + cutoff + "\n");
			
			//NotifyProgress("\tComputing event counts...  ");
			events = ComputeEventCounts(eventReader, predicateIndex, cutoff);
			//NotifyProgress("done. " + events.Count + " events");
			
			//NotifyProgress("\tIndexing...  ");
			eventsToCompare = Index(events, predicateIndex);
						
			//NotifyProgress("done.");
			
			//NotifyProgress("Sorting and merging oEvents... ");
			SortAndMerge(eventsToCompare);
			//NotifyProgress("Done indexing.");
		}
示例#8
0
 /// <summary>
 /// One argument constructor for OnePassDataIndexer which calls the two argument
 /// constructor assuming no cutoff.
 /// </summary>
 /// <param name="eventReader">
 /// An ITrainingEventReader which contains the a list of all the Events
 /// seen in the training data.
 /// </param>
 public OnePassDataIndexer(ITrainingEventReader eventReader) : this(eventReader, 0)
 {
 }
示例#9
0
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="eventReader">
 /// The ITrainingEventReader holding the data on which this model
 /// will be trained.
 /// </param>
 /// <param name="iterations">
 /// The number of GIS iterations to perform.
 /// </param>
 /// <param name="cutoff">
 /// The number of times a predicate must be seen in order
 /// to be relevant for training.
 /// </param>
 public virtual void TrainModel(ITrainingEventReader eventReader, int iterations, int cutoff)
 {
     TrainModel(iterations, new OnePassDataIndexer(eventReader, cutoff));
 }
示例#10
0
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="eventReader">
 /// The ITrainingEventReader holding the data on which this model
 /// will be trained.
 /// </param>
 public virtual void TrainModel(ITrainingEventReader eventReader)
 {
     TrainModel(eventReader, 100, 0);
 }
示例#11
0
		/// <summary>
		/// One argument constructor for DataIndexer which calls the two argument
		/// constructor assuming no cutoff.
		/// </summary>
		/// <param name="eventReader">
		/// An ITrainingEventReader which contains the list of all the events
		/// seen in the training data.
		/// </param>
		public TwoPassDataIndexer(ITrainingEventReader eventReader): this(eventReader, 0){}
示例#12
0
        private List<ComparableEvent> Index(int eventCount, ITrainingEventReader eventReader, Dictionary<string, int> predicateIndex)
		{
            var outcomeMap = new Dictionary<string, int>();
			int outcomeCount = 0;
            var eventsToCompare = new List<ComparableEvent>(eventCount);
            var indexedContext = new List<int>();
			while (eventReader.HasNext())
			{
				TrainingEvent currentTrainingEvent = eventReader.ReadNextEvent();
				string[] eventContext = currentTrainingEvent.Context;
				ComparableEvent comparableEvent;
				
				int	outcomeId;
				string outcome = currentTrainingEvent.Outcome;
				
				if (outcomeMap.ContainsKey(outcome))
				{
					outcomeId = outcomeMap[outcome];
				}
				else
				{
					outcomeId = outcomeCount++;
					outcomeMap.Add(outcome, outcomeId);
				}
				
				for (int currentPredicate = 0; currentPredicate < eventContext.Length; currentPredicate++)
				{
					string predicate = eventContext[currentPredicate];
					if (predicateIndex.ContainsKey(predicate))
					{
						indexedContext.Add(predicateIndex[predicate]);
					}
				}
				
				// drop events with no active features
				if (indexedContext.Count > 0)
				{
					comparableEvent = new ComparableEvent(outcomeId, indexedContext.ToArray());
					eventsToCompare.Add(comparableEvent);
				}
				else
				{
					//"Dropped event " + currentTrainingEvent.Outcome + ":" + currentTrainingEvent.Context);
				}
				// recycle the list
				indexedContext.Clear();
			}
			SetOutcomeLabels(ToIndexedStringArray(outcomeMap));
			SetPredicateLabels(ToIndexedStringArray(predicateIndex));
			return eventsToCompare;
		}
示例#13
0
		/// <summary>
		/// Reads events from <tt>eventStream</tt> into a dictionary.  The
		/// predicates associated with each event are counted and any which
		/// occur at least <tt>cutoff</tt> times are added to the
		/// <tt>predicatesInOut</tt> map along with a unique integer index.
		/// </summary>
		/// <param name="eventReader">
		/// an <code>ITrainingEventReader</code> value
		/// </param>
		/// <param name="eventStoreFile">
		/// a file name to which the events are written to for later processing.
		/// </param>
		/// <param name="predicatesInOut">
		/// a <code>Dictionary</code> value
		/// </param>
		/// <param name="cutoff">
		/// an <code>int</code> value
		/// </param>
        private int ComputeEventCounts(ITrainingEventReader eventReader, string eventStoreFile, Dictionary<string, int> predicatesInOut, int cutoff)
		{
            var counter = new Dictionary<string, int>();
			int predicateIndex = 0;
			int eventCount = 0;

			using (var eventStoreWriter = new StreamWriter(eventStoreFile))
			{
				while (eventReader.HasNext())
				{
					TrainingEvent currentTrainingEvent = eventReader.ReadNextEvent();
					eventCount++;
					eventStoreWriter.Write(FileEventReader.ToLine(currentTrainingEvent));
					string[] eventContext = currentTrainingEvent.Context;
					for (int currentPredicate = 0; currentPredicate < eventContext.Length; currentPredicate++)
					{
						if (!predicatesInOut.ContainsKey(eventContext[currentPredicate]))
						{
							if (counter.ContainsKey(eventContext[currentPredicate]))
							{
								counter[eventContext[currentPredicate]]++;
							}
							else
							{
								counter.Add(eventContext[currentPredicate], 1);
							}
							if (counter[eventContext[currentPredicate]] >= cutoff)
							{
								predicatesInOut.Add(eventContext[currentPredicate], predicateIndex++);
								counter.Remove(eventContext[currentPredicate]);
							}
						}
					}
				}
			}
			return eventCount;
		}
示例#14
0
		/// <summary>
        /// Reads events from <tt>eventReader</tt> into a List&lt;TrainingEvent&gt;.  The
		/// predicates associated with each event are counted and any which
		/// occur at least <tt>cutoff</tt> times are added to the
		/// <tt>predicatesInOut</tt> dictionary along with a unique integer index.
		/// </summary>
		/// <param name="eventReader">
		/// an <code>ITrainingEventReader</code> value
		/// </param>
		/// <param name="predicatesInOut">
		/// a <code>Dictionary</code> value
		/// </param>
		/// <param name="cutoff">
		/// an <code>int</code> value
		/// </param>
		/// <returns>
        /// an <code>List of TrainingEvents</code> value
		/// </returns>
        private List<TrainingEvent> ComputeEventCounts(ITrainingEventReader eventReader, Dictionary<string, int> predicatesInOut, int cutoff)
		{
            var counter = new Dictionary<string, int>();
            var events = new List<TrainingEvent>();
			int predicateIndex = 0;
			while (eventReader.HasNext())
			{
				TrainingEvent trainingEvent = eventReader.ReadNextEvent();
				events.Add(trainingEvent);
				string[] eventContext = trainingEvent.Context;
				for (int currentEventContext = 0; currentEventContext < eventContext.Length; currentEventContext++)
				{
					if (!predicatesInOut.ContainsKey(eventContext[currentEventContext]))
					{
						if (counter.ContainsKey(eventContext[currentEventContext]))
						{
							counter[eventContext[currentEventContext]]++;
						}
						else
						{
							counter.Add(eventContext[currentEventContext], 1);
						}
						if (counter[eventContext[currentEventContext]] >= cutoff)
						{
							predicatesInOut.Add(eventContext[currentEventContext], predicateIndex++);
							counter.Remove(eventContext[currentEventContext]);
						}
					}
				}
			}
			return events;
		}