Пример #1
0
        /// <summary>
        /// Trains and writes a model based on the events in the specified event file.
        /// the name of the model created is based on the event file name. </summary>
        /// <param name="args"> eventfile [iterations cuttoff] </param>
        /// <exception cref="IOException"> when the eventfile can not be read or the model file can not be written. </exception>

        public static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                Console.Error.WriteLine("Usage: FileEventStream eventfile [iterations cutoff]");
                Environment.Exit(1);
            }
            int    ai         = 0;
            string eventFile  = args[ai++];
            int    iterations = 100;
            int    cutoff     = 5;

            if (ai < args.Length)
            {
                iterations = Convert.ToInt32(args[ai++]);
                cutoff     = Convert.ToInt32(args[ai++]);
            }
            AbstractModel   model;
            FileEventStream es = new FileEventStream(eventFile);

            try
            {
                model = GIS.trainModel(es, iterations, cutoff);
            }
            finally
            {
                es.close();
            }
            (new SuffixSensitiveGISModelWriter(model, new Jfile(eventFile + ".bin.gz"))).persist();
        }
Пример #2
0
        /// <summary>
        /// Reads events from <tt>eventStream</tt> into a linked list.  The
        /// predicates associated with each event are counted and any which
        /// occur at least <tt>cutoff</tt> times are added to the
        /// <tt>predicatesInOut</tt> map along with a unique integer index.
        /// </summary>
        /// <param name="eventStream"> an <code>EventStream</code> value </param>
        /// <param name="eventStore"> a writer to which the events are written to for later processing. </param>
        /// <param name="predicatesInOut"> a <code>TObjectIntHashMap</code> value </param>
        /// <param name="cutoff"> an <code>int</code> value </param>
        private int computeEventCounts(EventStream eventStream, Writer eventStore,
                                       IDictionary <string, int?> predicatesInOut, int cutoff)
        {
            IDictionary <string, int?> counter = new Dictionary <string, int?>();
            int eventCount = 0;
            HashSet <string> predicateSet = new HashSet <string>();

            while (eventStream.hasNext())
            {
                Event ev = eventStream.next();
                eventCount++;
                eventStore.write(FileEventStream.toLine(ev));
                string[] ec = ev.Context;
                update(ec, predicateSet, counter, cutoff);
            }
            predCounts = new int[predicateSet.Count];
            int index = 0;

            for (IEnumerator <string> pi = predicateSet.GetEnumerator(); pi.MoveNext(); index++)
            {
                string predicate = pi.Current;
                predCounts[index]          = counter[predicate].GetValueOrDefault();
                predicatesInOut[predicate] = index;
            }
            eventStore.close();
            return(eventCount);
        }
Пример #3
0
        /// <summary>
        /// Two argument constructor for DataIndexer.
        /// </summary>
        /// <param name="eventStream"> An Event[] which contains the a list of all the Events
        ///               seen in the training data. </param>
        /// <param name="cutoff"> The minimum number of times a predicate must have been
        ///               observed in order to be included in the model. </param>
        public TwoPassDataIndexer(EventStream eventStream, int cutoff, bool sort)
        {
            IDictionary <string, int?> predicateIndex = new Dictionary <string, int?>();
            List <ComparableEvent>     eventsToCompare;

            Console.WriteLine("Indexing events using cutoff of " + cutoff + "\n");

            Console.Write("\tComputing event counts...  ");
            try
            {
                Jfile tmp = Jfile.createTempFile("events", null);
                tmp.deleteOnExit();
                Writer osw       = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(tmp), "UTF8"));
                int    numEvents = computeEventCounts(eventStream, osw, predicateIndex, cutoff);
                Console.WriteLine("done. " + numEvents + " events");

                Console.Write("\tIndexing...  ");

                FileEventStream fes = new FileEventStream(tmp);
                try
                {
                    eventsToCompare = index(numEvents, fes, predicateIndex);
                }
                finally
                {
                    fes.close();
                }
                // done with predicates
                predicateIndex = null;
                tmp.delete();
                Console.WriteLine("done.");

                if (sort)
                {
                    Console.Write("Sorting and merging events... ");
                }
                else
                {
                    Console.Write("Collecting events... ");
                }
                sortAndMerge(eventsToCompare, sort);
                Console.WriteLine("Done indexing.");
            }
            catch (IOException e)
            {
                Console.Error.WriteLine(e);
            }
        }