public static void Process(string SearchName)
        {
            /* determin which dataacquirer and which dataindexer to be instantiated with "Arg" argument */
            SearchElement search = Config.Searches[SearchName];

            Type AcquirerType = Type.GetType(search.DataAcquirer);
            Type IndexerType = Type.GetType(search.DataIndexer);

            dataAcquirer = (IDataAcquirer)Activator.CreateInstance(AcquirerType, new object[] { search });
            dataIndexer = (IDataIndexer)Activator.CreateInstance(IndexerType, new object[] { search });
            /* end */

            List<ISearchEntity> data = dataAcquirer.GetData();

            List<ISearchEntity> CreateEntities = new List<ISearchEntity>();
            List<ISearchEntity> DeleteEntities = new List<ISearchEntity>();

            foreach (ISearchEntity entity in data)
            {
                if (entity.ProcessType == EntityProcessType.insert)
                    AddCreateIndex(CreateEntities, entity);
                else if (entity.ProcessType == EntityProcessType.update)
                    AddCreateIndex(CreateEntities, entity);
                else if (entity.ProcessType == EntityProcessType.delete)
                    DeleteEntities.Add(entity);
                else
                    continue;
            }

            dataIndexer.CreateIndex(CreateEntities);
            dataIndexer.DeleteIndex(DeleteEntities);

            Console.Read();
        }
        /// <summary>
        /// Retrieves the entity of the specified type from the data store.
        /// </summary>
        /// <param name="propertyName">Name of the property to match.</param>
        /// <param name="propertyValue">The value of the property to match.</param>
        /// <returns>The entity of the specified type found in the data store.</returns>
        public override IEntity GetEntity(Type type, string propertyName, object propertyValue)
        {
            IEntity entity = null;

            using (LogGroup logGroup = LogGroup.StartDebug("Retrieving the entity of the specified type with a property matching the provided property name and value."))
            {
                IDataIndexer indexer = Provider.InitializeDataIndexer();
                indexer.AutoRelease = AutoRelease;
                indexer.DataStore   = DataStore;

                IEntity[] entities = indexer.GetEntities(type, propertyName, propertyValue);

                if (entities != null && entities.Length > 0)
                {
                    entity = entities[0];
                }

                if (entity == null)
                {
                    LogWriter.Debug("Entity: [null]");
                }
                else
                {
                    LogWriter.Debug("Entity ID: " + entity.ID.ToString());
                }
            }

            return(entity);
        }
Esempio n. 3
0
        /// <summary>
        /// Initializes a new instance of the <see cref="NegLogLikelihood"/> class.
        /// </summary>
        /// <param name="indexer">The data indexer.</param>
        public NegLogLikelihood(IDataIndexer indexer)
        {
            if (indexer == null)
            {
                throw new ArgumentNullException(nameof(indexer));
            }

            if (!indexer.Completed)
            {
                indexer.Execute();
            }

            values = indexer is OnePassRealValueDataIndexer ? indexer.Values : null;

            contexts           = indexer.GetContexts();
            outcomeList        = indexer.GetOutcomeList();
            numTimesEventsSeen = indexer.GetNumTimesEventsSeen();

            numOutcomes = indexer.GetOutcomeLabels().Length;
            numFeatures = indexer.GetPredLabels().Length;
            numContexts = contexts.Length;
            dimension   = numOutcomes * numFeatures;

            expectation = new double[numOutcomes];
            tempSums    = new double[numOutcomes];
            gradient    = new double[dimension];
        }
        /// <summary>
        /// Retrieves the entity of the specified type matching the specified values.
        /// </summary>
        /// <param name="type">The type of entity to retrieve.</param>
        /// <param name="parameters">The parameters to query with.</param>
        /// <returns></returns>
        public override IEntity GetEntity(Type type, Dictionary <string, object> parameters)
        {
            IEntity entity = null;

            using (LogGroup logGroup = LogGroup.StartDebug("Retrieving the entity of the specified type matching the provided parameters."))
            {
                foreach (string key in parameters.Keys)
                {
                    LogWriter.Debug("Parameter: " + key + " = " + parameters[key].ToString());
                }

                IDataIndexer indexer = Provider.InitializeDataIndexer();
                indexer.AutoRelease = AutoRelease;
                indexer.DataStore   = DataStore;

                IEntity[] entities = indexer.GetEntities(type, parameters);

                // TODO: Check if this should be ignored.
                if (entities.Length > 1)
                {
                    throw new Exception("More than one match found when there should only be one.");
                }

                if (entities == null || entities.Length == 0)
                {
                    entity = null;
                }
                else
                {
                    entity = entities[0];
                }
            }
            return(entity);
        }
        /// <summary>
        /// Execute the training operation.
        /// </summary>
        /// <param name="indexer">The data indexer.</param>
        /// <returns>The trained <see cref="IMaxentModel"/> model.</returns>
        protected override IMaxentModel DoTrain(IDataIndexer indexer)
        {
            Display("Incorporating indexed data for training...");

            indexer.Execute();

            contexts           = indexer.GetContexts();
            values             = indexer.Values;
            numTimesEventsSeen = indexer.GetNumTimesEventsSeen();
            numEvents          = indexer.GetNumEvents();
            numUniqueEvents    = contexts.Length;

            outcomeLabels = indexer.GetOutcomeLabels();
            outcomeList   = indexer.GetOutcomeList();

            predLabels  = indexer.GetPredLabels();
            numPreds    = predLabels.Length;
            numOutcomes = outcomeLabels.Length;

            Display("done.");

            Display("\tNumber of Event Tokens: " + numUniqueEvents);
            Display("\t    Number of Outcomes: " + numOutcomes);
            Display("\t  Number of Predicates: " + numPreds);

            Display("Computing model parameters...");

            // ReSharper disable once CoVariantArrayConversion - we read the parameters ;)
            Context[] finalParameters = FindParameters();

            Display("...done.\n");


            return(new NaiveBayesModel(finalParameters, predLabels, outcomeLabels));
        }
Esempio n. 6
0
        /// <summary>
        /// Initializes a new instance of the <see cref="QNModelEvaluator"/> class.
        /// </summary>
        /// <param name="indexer">The data indexer.</param>
        /// <exception cref="System.ArgumentNullException">The <paramref name="indexer"/> is null.</exception>
        public QNModelEvaluator(IDataIndexer indexer)
        {
            if (indexer == null)
            {
                throw new ArgumentNullException(nameof(indexer));
            }

            this.indexer = indexer;
        }
Esempio n. 7
0
File: GIS.cs Progetto: qooba/SharpNL
        /// <summary>
        /// Train a model using the GIS algorithm.
        /// </summary>
        /// <param name="iterations">The number of GIS iterations to perform.</param>
        /// <param name="indexer">The object which will be used for event compilation.</param>
        /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param>
        /// <param name="modelPrior">The prior distribution for the model.</param>
        /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param>
        /// <param name="threads">The number of threads to use during the training.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns>
        public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, IPrior modelPrior, int cutoff, int threads, Monitor monitor)
        {
            var trainer = new GISTrainer(monitor)
            {
                Smoothing            = smoothing,
                SmoothingObservation = SmoothingObservation
            };

            if (modelPrior == null)
            {
                modelPrior = new UniformPrior();
            }

            return(trainer.TrainModel(iterations, indexer, modelPrior, cutoff, threads));
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="NegLogLikelihood"/> class.
        /// </summary>
        /// <param name="indexer">The data indexer.</param>
        /// <param name="threads">The number of threads.</param>
        public ParallelNegLogLikelihood(IDataIndexer indexer, int threads) : base(indexer)
        {
            if (threads <= 0)
            {
                throw new ArgumentOutOfRangeException(nameof(threads), "The number of threads must be 1 or larger.");
            }

            this.threads = threads;

            negLogLikelihoodThread = new double[threads];
            gradientThread         = new double[threads][];

            for (var i = 0; i < threads; i++)
            {
                gradientThread[i] = new double[Dimension];
            }
        }
        /// <summary>
        /// Retrieves the first/only entity that has a reference matching the provided parameters.
        /// </summary>
        /// <param name="propertyName">The name of the property to query for.</param>
        /// <param name="referencedEntityType">The type of entity being referenced.</param>
        /// <param name="referencedEntityID">The ID of the referenced entity to match.</param>
        /// <returns>An array of the objects retrieved.</returns>
        public override T GetEntityWithReference <T>(string propertyName, Type referencedEntityType, Guid referencedEntityID)
        {
            IDataIndexer indexer = Provider.InitializeDataIndexer();

            indexer.AutoRelease = AutoRelease;
            indexer.DataStore   = DataStore;

            T[] entities = indexer.GetEntitiesWithReference <T>(propertyName, referencedEntityType, referencedEntityID);

            if (entities.Length == 0)
            {
                return(default(T));
            }
            else
            {
                return(entities[0]);
            }
        }
Esempio n. 10
0
        /// <summary>
        /// Retrieves the entity matching the filter group.
        /// </summary>
        /// <param name="group">The group of filters to apply to the query.</param>
        /// <returns>The matching entity.</returns>
        public override IEntity GetEntity(FilterGroup group)
        {
            IDataIndexer indexer = Provider.InitializeDataIndexer();

            indexer.AutoRelease = AutoRelease;
            indexer.DataStore   = DataStore;

            IEntity[] entities = indexer.GetEntities(group);

            if (entities != null && entities.Length > 0)
            {
                return(entities[0]);
            }
            else
            {
                return(null);
            }
        }
Esempio n. 11
0
        /// <summary>
        /// Train a model using the Perceptron algorithm.
        /// </summary>
        /// <param name="iterations">The number of Perceptron iterations to perform.</param>
        /// <param name="indexer">The object which will be used for event compilation.</param>
        /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param>
        /// <param name="useAverage"></param>
        /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="IO.PerceptronModelWriter"/> object.</returns>
        public AbstractModel TrainModel(int iterations, IDataIndexer indexer, int cutoff, bool useAverage)
        {
            Display("Incorporating indexed data for training...");

            info.Append("Trained using Perceptron algorithm.\n\n");

            // Executes the data indexer
            indexer.Execute();

            contexts           = indexer.GetContexts();
            values             = indexer.Values;
            numTimesEventsSeen = indexer.GetNumTimesEventsSeen();
            numEvents          = indexer.GetNumEvents();
            numUniqueEvents    = contexts.Length;

            outcomeLabels = indexer.GetOutcomeLabels();
            outcomeList   = indexer.GetOutcomeList();

            predLabels  = indexer.GetPredLabels();
            numPreds    = predLabels.Length;
            numOutcomes = outcomeLabels.Length;

            Display("\ndone.\n");

            info.Append("Number of Event Tokens: {0}\n" +
                        "    Number of Outcomes: {1}\n" +
                        "  Number of Predicates: {2}\n", numEvents, numOutcomes, numPreds);

            Display("\tNumber of Event Tokens: " + numUniqueEvents);
            Display("\t    Number of Outcomes: " + numOutcomes);
            Display("\t  Number of Predicates: " + numPreds);

            Display("Computing model parameters.");

            var finalParameters = FindParameters(iterations, useAverage);

            Display("\ndone.\n");

            // ReSharper disable once CoVariantArrayConversion
            return(new PerceptronModel(finalParameters, predLabels, outcomeLabels)
            {
                info = info
            });
        }
        public static void Process(string SearchName)
        {
            /* determin which dataacquirer and which dataindexer to be instantiated with "Arg" argument */
            SearchElement search = Config.Searches[SearchName];

            Type AcquirerType = Type.GetType(search.DataAcquirer);
            Type IndexerType  = Type.GetType(search.DataIndexer);


            dataAcquirer = (IDataAcquirer)Activator.CreateInstance(AcquirerType, new object[] { search });
            dataIndexer  = (IDataIndexer)Activator.CreateInstance(IndexerType, new object[] { search });
            /* end */

            List <ISearchEntity> data = dataAcquirer.GetData();

            List <ISearchEntity> CreateEntities = new List <ISearchEntity>();
            List <ISearchEntity> DeleteEntities = new List <ISearchEntity>();

            foreach (ISearchEntity entity in data)
            {
                if (entity.ProcessType == EntityProcessType.insert)
                {
                    AddCreateIndex(CreateEntities, entity);
                }
                else if (entity.ProcessType == EntityProcessType.update)
                {
                    AddCreateIndex(CreateEntities, entity);
                }
                else if (entity.ProcessType == EntityProcessType.delete)
                {
                    DeleteEntities.Add(entity);
                }
                else
                {
                    continue;
                }
            }

            dataIndexer.CreateIndex(CreateEntities);
            dataIndexer.DeleteIndex(DeleteEntities);

            Console.Read();
        }
Esempio n. 13
0
        private bool CompareDoubleArray(double[] expected, double[] actual, IDataIndexer indexer, double tolerance)
        {
            var alignedActual = AlignDoubleArrayForTestData(actual, indexer.GetPredLabels(), indexer.GetOutcomeLabels());

            if (expected.Length != alignedActual.Length)
            {
                return(false);
            }

            for (var i = 0; i < alignedActual.Length; i++)
            {
                if (Math.Abs(alignedActual[i] - expected[i]) > tolerance)
                {
                    return(false);
                }
            }

            return(true);
        }
Esempio n. 14
0
        /// <summary>
        /// Retrieves the entity of the specified type matching the specified values.
        /// </summary>
        /// <param name="parameters">The parameters to query with.</param>
        /// <returns></returns>
        public override T GetEntity <T>(Dictionary <string, object> parameters)
        {
            T entity = default(T);

            using (LogGroup logGroup = LogGroup.StartDebug("Retrieving the entity of the specified type matching the provided entities."))
            {
                IDataIndexer indexer = Provider.InitializeDataIndexer();
                indexer.AutoRelease = AutoRelease;
                indexer.DataStore   = DataStore;

                T[] entities = indexer.GetEntities <T>(parameters);
                if (entities == null || entities.Length == 0)
                {
                    entity = default(T);
                }
                else
                {
                    entity = entities[0];
                }
            }
            return(entity);
        }
Esempio n. 15
0
        protected override IMaxentModel DoTrain(IDataIndexer indexer)
        {
            if (!IsValid())
            {
                throw new InvalidOperationException("trainParams are not valid!");
            }

            var useAverage = GetBoolParam(Parameters.UseAverage, true);

            UseSkippedAveraging = GetBoolParam(Parameters.UseSkippedAveraging, false);

            // overwrite otherwise it might not work
            if (UseSkippedAveraging)
            {
                useAverage = true;
            }

            StepSizeDecrease = GetDoubleParam(Parameters.StepSizeDecrease, 0d);

            Tolerance = GetDoubleParam(Parameters.Tolerance, DefaultTolerance);

            return(TrainModel(Iterations, indexer, Cutoff, useAverage));
        }
Esempio n. 16
0
        /// <summary>
        /// Retrieves the entity of the specified type from the data store.
        /// </summary>
        /// <param name="propertyName">Name of the property to match.</param>
        /// <param name="propertyValue">The value of the property to match.</param>
        /// <returns>The entity of the specified type found in the data store.</returns>
        public override T GetEntity <T>(string propertyName, object propertyValue)
        {
            T entity = default(T);

            using (LogGroup logGroup = LogGroup.StartDebug("Retrieving the entity of the specified type matching the provided property value."))
            {
                IDataIndexer indexer = Provider.InitializeDataIndexer();
                indexer.AutoRelease = AutoRelease;
                indexer.DataStore   = DataStore;

                T[] entities = indexer.GetEntities <T>(propertyName, propertyValue);

                if (entities == null || entities.Length == 0)
                {
                    entity = default(T);
                }
                else
                {
                    entity = entities[0];
                }
            }
            return(entity);
        }
Esempio n. 17
0
 /// <summary>
 /// Train a model using the Perceptron algorithm.
 /// </summary>
 /// <param name="iterations">The number of Perceptron iterations to perform.</param>
 /// <param name="indexer">The object which will be used for event compilation.</param>
 /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="IO.PerceptronModelWriter"/> object.</returns>
 public AbstractModel TrainModel(int iterations, IDataIndexer indexer, int cutoff) {
     return TrainModel(iterations, indexer, cutoff, true);
 }
Esempio n. 18
0
        /// <summary>
        /// Train a model using the Perceptron algorithm.
        /// </summary>
        /// <param name="iterations">The number of Perceptron iterations to perform.</param>
        /// <param name="indexer">The object which will be used for event compilation.</param>
        /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param>
        /// <param name="useAverage"></param>
        /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="IO.PerceptronModelWriter"/> object.</returns>
        public AbstractModel TrainModel(int iterations, IDataIndexer indexer, int cutoff, bool useAverage) {
            Display("Incorporating indexed data for training...");

            info.Append("Trained using Perceptron algorithm.\n\n");

            // Executes the data indexer
            indexer.Execute();

            contexts = indexer.GetContexts();
            values = indexer.Values;
            numTimesEventsSeen = indexer.GetNumTimesEventsSeen();
            numEvents = indexer.GetNumEvents();
            numUniqueEvents = contexts.Length;

            outcomeLabels = indexer.GetOutcomeLabels();
            outcomeList = indexer.GetOutcomeList();

            predLabels = indexer.GetPredLabels();
            numPreds = predLabels.Length;
            numOutcomes = outcomeLabels.Length;

            Display("\ndone.\n");

            info.Append("Number of Event Tokens: {0}\n" +
                        "    Number of Outcomes: {1}\n" +
                        "  Number of Predicates: {2}\n", numEvents, numOutcomes, numPreds);

            Display("\tNumber of Event Tokens: " + numUniqueEvents);
            Display("\t    Number of Outcomes: " + numOutcomes);
            Display("\t  Number of Predicates: " + numPreds);

            Display("Computing model parameters.");

            var finalParameters = FindParameters(iterations, useAverage);

            Display("\ndone.\n");

            // ReSharper disable once CoVariantArrayConversion
            return new PerceptronModel(finalParameters, predLabels, outcomeLabels) {
                info = info
            };
        }
Esempio n. 19
0
 /// <summary>
 /// Execute the training operation.
 /// </summary>
 /// <param name="indexer">The data indexer.</param>
 /// <returns>The trained <see cref="IMaxentModel"/> model.</returns>
 protected abstract IMaxentModel DoTrain(IDataIndexer indexer);
Esempio n. 20
0
 /// <summary>
 /// Execute the training operation.
 /// </summary>
 /// <param name="indexer">The data indexer.</param>
 /// <returns>The trained <see cref="IMaxentModel"/> model.</returns>
 protected override IMaxentModel DoTrain(IDataIndexer indexer)
 {
     return(TrainModel(Iterations, indexer));
 }
Esempio n. 21
0
 /// <summary>
 /// Execute the training operation.
 /// </summary>
 /// <param name="indexer">The data indexer.</param>
 /// <returns>The trained <see cref="IMaxentModel"/> model.</returns>
 protected abstract IMaxentModel DoTrain(IDataIndexer indexer);
Esempio n. 22
0
        /// <summary>
        /// Train a model using the GIS algorithm.
        /// </summary>
        /// <param name="iterations">The number of GIS iterations to perform.</param>
        /// <param name="indexer">The object which will be used for event compilation.</param>
        /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param>
        /// <param name="modelPrior">The prior distribution for the model.</param>
        /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param>
        /// <param name="threads">The number of threads to use during the training.</param>
        /// <param name="monitor">
        /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
        /// This argument can be a <c>null</c> value.
        /// </param>
        /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns>
        public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, IPrior modelPrior, int cutoff, int threads, Monitor monitor) {
                var trainer = new GISTrainer(monitor) {
                Smoothing = smoothing,
                SmoothingObservation = SmoothingObservation
            };

            if (modelPrior == null) {
                modelPrior = new UniformPrior();
            }

            return trainer.TrainModel(iterations, indexer, modelPrior, cutoff, threads);
        }
Esempio n. 23
0
        /// <summary>
        /// Perform the training process using the specified <paramref name="indexer"/> object.
        /// </summary>
        /// <param name="indexer">The indexer.</param>
        /// <returns>The trained <see cref="IMaxentModel"/> model.</returns>
        protected override IMaxentModel DoTrain(IDataIndexer indexer) {
            var threads = GetIntParam("Threads", 1);

            return TrainModel(Iterations, indexer, false, null, 0, threads, Monitor);
        }
Esempio n. 24
0
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="iterations">The number of GIS iterations to perform.</param>
 /// <param name="indexer">The object which will be used for event compilation.</param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns>
 public static GISModel TrainModel(int iterations, IDataIndexer indexer) {
     return TrainModel(iterations, indexer, false, null, 0);
 }
Esempio n. 25
0
File: GIS.cs Progetto: qooba/SharpNL
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="iterations">The number of GIS iterations to perform.</param>
 /// <param name="indexer">The object which will be used for event compilation.</param>
 /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param>
 /// <param name="monitor">
 /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
 /// This argument can be a <c>null</c> value.
 /// </param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns>
 public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, Monitor monitor)
 {
     return(TrainModel(iterations, indexer, smoothing, null, 0, monitor));
 }
Esempio n. 26
0
 /// <summary>
 /// Train a model using the Perceptron algorithm.
 /// </summary>
 /// <param name="iterations">The number of Perceptron iterations to perform.</param>
 /// <param name="indexer">The object which will be used for event compilation.</param>
 /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="IO.PerceptronModelWriter"/> object.</returns>
 public AbstractModel TrainModel(int iterations, IDataIndexer indexer, int cutoff)
 {
     return(TrainModel(iterations, indexer, cutoff, true));
 }
Esempio n. 27
0
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="iterations">The number of GIS iterations to perform.</param>
 /// <param name="di">The data indexer used to compress events in memory.</param>
 /// <param name="modelCutoff">The number of times a feature must occur to be included.</param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns>
 public GISModel TrainModel(int iterations, IDataIndexer di, int modelCutoff) {
     return TrainModel(iterations, di, new UniformPrior(), modelCutoff, 1);
 }
Esempio n. 28
0
        /// <summary>
        /// Train a model using the GIS algorithm.
        /// </summary>
        /// <param name="iterations">The number of GIS iterations to perform.</param>
        /// <param name="di">The data indexer used to compress events in memory.</param>
        /// <param name="modelPrior">The prior distribution used to train this model.</param>
        /// <param name="modelCutoff">The number of times a feature must occur to be included.</param>
        /// <param name="threads">The number of threads used to train this model.</param>
        /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns>
        public GISModel TrainModel(int iterations, IDataIndexer di, IPrior modelPrior, int modelCutoff, int threads) {

            if (threads <= 0)
                throw new ArgumentOutOfRangeException("threads", threads, @"Threads must be at least one or greater.");

            modelExpects = new MutableContext[threads][];

            info.Append("Trained using GIS algorithm.\n\n");

            // Executes the data indexer
            di.Execute();

            // Incorporate all of the needed info.
            Display("Incorporating indexed data for training...");
            contexts = di.GetContexts();
            values = di.Values;
            cutoff = modelCutoff;
            predicateCounts = di.GetPredCounts();
            numTimesEventsSeen = di.GetNumTimesEventsSeen();
            numUniqueEvents = contexts.Length;
            prior = modelPrior;

            // determine the correction constant and its inverse
            double correctionConstant = 0;
            for (int ci = 0; ci < contexts.Length; ci++) {
                if (values == null || values[ci] == null) {
                    if (contexts[ci].Length > correctionConstant) {
                        correctionConstant = contexts[ci].Length;
                    }
                } else {
                    var cl = values[ci][0];
                    for (var vi = 1; vi < values[ci].Length; vi++) {
                        cl += values[ci][vi];
                    }

                    if (cl > correctionConstant) {
                        correctionConstant = cl;
                    }
                }
            }

            Display("done.");

            outcomeLabels = di.GetOutcomeLabels();
            outcomeList = di.GetOutcomeList();
            numOutcomes = outcomeLabels.Length;

            predLabels = di.GetPredLabels();
            prior.SetLabels(outcomeLabels, predLabels);
            numPreds = predLabels.Length;

            info.Append("Number of Event Tokens: {0}\n", numUniqueEvents);
            info.Append("    Number of Outcomes: {0}\n", numOutcomes);
            info.Append("  Number of Predicates: {0}\n", numPreds);

            Display("\tNumber of Event Tokens: " + numUniqueEvents);
            Display("\t    Number of Outcomes: " + numOutcomes);
            Display("\t  Number of Predicates: " + numPreds);

            // set up feature arrays
            //var predCount = new float[numPreds][numOutcomes];

            var predCount = new float[numPreds][];

            for (int ti = 0; ti < numUniqueEvents; ti++) {
                for (int j = 0; j < contexts[ti].Length; j++) {

                    if (predCount[contexts[ti][j]] == null) {
                        predCount[contexts[ti][j]] = new float[numOutcomes];
                    }

                    if (values != null && values[ti] != null) {
                        predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]*values[ti][j];
                    } else {
                        predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti];
                    }
                }
            }

            // ReSharper disable once RedundantAssignment
            di = null;

            // Get the observed expectations of the features. Strictly speaking,
            // we should divide the counts by the number of Tokens, but because of
            // the way the model's expectations are approximated in the
            // implementation, this is canceled out when we compute the next
            // iteration of a parameter, making the extra divisions wasteful.
            param = new MutableContext[numPreds];
            for (var i = 0; i < modelExpects.Length; i++)
                modelExpects[i] = new MutableContext[numPreds];

            observedExpects = new MutableContext[numPreds];


            // The model does need the correction constant and the correction feature. The correction constant
            // is only needed during training, and the correction feature is not necessary.
            // For compatibility reasons the model contains form now on a correction constant of 1,
            // and a correction param 0.
            // ReSharper disable once CoVariantArrayConversion
            evalParams = new EvalParameters(param, 0, 1, numOutcomes);

            var activeOutcomes = new int[numOutcomes];
            var allOutcomesPattern = new int[numOutcomes];
            for (var oi = 0; oi < numOutcomes; oi++) {
                allOutcomesPattern[oi] = oi;
            }
            for (var pi = 0; pi < numPreds; pi++) {
                var numActiveOutcomes = 0;
                int[] outcomePattern;
                if (Smoothing) {
                    numActiveOutcomes = numOutcomes;
                    outcomePattern = allOutcomesPattern;
                } else {
                    //determine active outcomes
                    for (var oi = 0; oi < numOutcomes; oi++) {
                        if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) {
                            activeOutcomes[numActiveOutcomes] = oi;
                            numActiveOutcomes++;
                        }
                    }
                    if (numActiveOutcomes == numOutcomes) {
                        outcomePattern = allOutcomesPattern;
                    } else {
                        outcomePattern = new int[numActiveOutcomes];
                        for (var aoi = 0; aoi < numActiveOutcomes; aoi++) {
                            outcomePattern[aoi] = activeOutcomes[aoi];
                        }
                    }
                }
                param[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]);

                foreach (MutableContext[] me in modelExpects)
                    me[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]);

                observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]);
                for (var aoi = 0; aoi < numActiveOutcomes; aoi++) {
                    var oi = outcomePattern[aoi];
                    param[pi].SetParameter(aoi, 0.0);

                    foreach (var modelExpect in modelExpects) {
                        modelExpect[pi].SetParameter(aoi, 0.0);
                    }

                    if (predCount[pi][oi] > 0) {
                        observedExpects[pi].SetParameter(aoi, predCount[pi][oi]);
                    } else if (Smoothing) {
                        observedExpects[pi].SetParameter(aoi, SmoothingObservation);
                    }
                }
            }

            Display("...done.");

            /***************** Find the parameters ************************/
            if (threads == 1)
                Display("Computing model parameters ...");
            else
                Display("Computing model parameters in " + threads + " threads...");

            FindParameters(iterations, correctionConstant);

            /*************** Create and return the model ******************/

            // To be compatible with old models the correction constant is always 1

            // ReSharper disable once CoVariantArrayConversion
            return new GISModel(param, predLabels, outcomeLabels, 1, evalParams.CorrectionParam) {
                info = TrainingInfo
            };
        }
Esempio n. 29
0
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="iterations">The number of GIS iterations to perform.</param>
 /// <param name="indexer">The object which will be used for event compilation.</param>
 /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param>
 /// <param name="modelPrior">The prior distribution for the model.</param>
 /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns>
 public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, IPrior modelPrior, int cutoff) {
     return TrainModel(iterations, indexer, smoothing, modelPrior, cutoff, 1);
 }
Esempio n. 30
0
        protected override IMaxentModel DoTrain(IDataIndexer indexer) {
            if (!IsValid())
                throw new InvalidOperationException("trainParams are not valid!");

            var useAverage = GetBoolParam(Parameters.UseAverage, true);

            UseSkippedAveraging = GetBoolParam(Parameters.UseSkippedAveraging, false);

            // overwrite otherwise it might not work
            if (UseSkippedAveraging)
                useAverage = true;

            StepSizeDecrease = GetDoubleParam(Parameters.StepSizeDecrease, 0d);

            Tolerance = GetDoubleParam(Parameters.Tolerance, DefaultTolerance);

            return TrainModel(Iterations, indexer, Cutoff, useAverage);
        }
Esempio n. 31
0
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="iterations">The number of GIS iterations to perform.</param>
 /// <param name="indexer">The object which will be used for event compilation.</param>
 /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param>
 /// <param name="monitor">
 /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
 /// This argument can be a <c>null</c> value.
 /// </param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns>
 public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, Monitor monitor) {
     return TrainModel(iterations, indexer, smoothing, null, 0, monitor);
 }
Esempio n. 32
0
File: GIS.cs Progetto: qooba/SharpNL
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="iterations">The number of GIS iterations to perform.</param>
 /// <param name="indexer">The object which will be used for event compilation.</param>
 /// <param name="monitor">
 /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation.
 /// This argument can be a <c>null</c> value.
 /// </param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns>
 public static GISModel TrainModel(int iterations, IDataIndexer indexer, Monitor monitor)
 {
     return(TrainModel(iterations, indexer, false, null, 0, monitor));
 }
Esempio n. 33
0
        /// <summary>
        /// Execute the training operation.
        /// </summary>
        /// <param name="iterations">The number of iterations.</param>
        /// <param name="indexer">The data indexer.</param>
        /// <returns>The trained <see cref="IMaxentModel" /> model.</returns>
        /// <exception cref="System.ArgumentOutOfRangeException">iterations</exception>
        /// <exception cref="System.ArgumentNullException">indexer</exception>
        /// <exception cref="System.InvalidOperationException">The number of threads is invalid.</exception>
        public QNModel TrainModel(int iterations, IDataIndexer indexer)
        {
            if (iterations < 0)
            {
                throw new ArgumentOutOfRangeException(nameof(iterations));
            }

            if (indexer == null)
            {
                throw new ArgumentNullException(nameof(indexer));
            }

            IFunction function;

            if (threads == 1)
            {
                Display("Computing model parameters ...");
                function = new NegLogLikelihood(indexer);
            }
            else if (threads > 1)
            {
                Display("Computing model parameters in " + threads + " threads ...");
                function = new ParallelNegLogLikelihood(indexer, threads);
            }
            else
            {
                throw new InvalidOperationException("The number of threads is invalid.");
            }

            if (!indexer.Completed)
            {
                indexer.Execute();
            }

            var minimizer = new QNMinimizer(l1Cost, l2Cost, iterations, updates, maxFctEval, Monitor)
            {
                Evaluator = new QNModelEvaluator(indexer)
            };

            // minimized parameters
            var mp = minimizer.Minimize(function);

            // construct model with trained parameters

            var predLabels  = indexer.GetPredLabels();
            var nPredLabels = predLabels.Length;

            var outcomeNames = indexer.GetOutcomeLabels();
            var nOutcomes    = outcomeNames.Length;

            var parameters = new Context[nPredLabels];

            for (var ci = 0; ci < parameters.Length; ci++)
            {
                var outcomePattern = new List <int>(nOutcomes);
                var alpha          = new List <double>(nOutcomes);
                for (var oi = 0; oi < nOutcomes; oi++)
                {
                    var val = mp[oi * nPredLabels + ci];
                    outcomePattern.Add(oi);
                    alpha.Add(val);
                }
                parameters[ci] = new Context(outcomePattern.ToArray(), alpha.ToArray());
            }
            return(new QNModel(parameters, predLabels, outcomeNames));
        }
Esempio n. 34
0
File: GIS.cs Progetto: qooba/SharpNL
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="iterations">The number of GIS iterations to perform.</param>
 /// <param name="indexer">The object which will be used for event compilation.</param>
 /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param>
 /// <param name="modelPrior">The prior distribution for the model.</param>
 /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns>
 public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, IPrior modelPrior, int cutoff)
 {
     return(TrainModel(iterations, indexer, smoothing, modelPrior, cutoff, 1));
 }
Esempio n. 35
0
 /// <summary>
 /// Train a model using the GIS algorithm.
 /// </summary>
 /// <param name="iterations">The number of GIS iterations to perform.</param>
 /// <param name="di">The data indexer used to compress events in memory.</param>
 /// <param name="modelCutoff">The number of times a feature must occur to be included.</param>
 /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns>
 public GISModel TrainModel(int iterations, IDataIndexer di, int modelCutoff)
 {
     return(TrainModel(iterations, di, new UniformPrior(), modelCutoff, 1));
 }
Esempio n. 36
0
File: GIS.cs Progetto: qooba/SharpNL
        /// <summary>
        /// Perform the training process using the specified <paramref name="indexer"/> object.
        /// </summary>
        /// <param name="indexer">The indexer.</param>
        /// <returns>The trained <see cref="IMaxentModel"/> model.</returns>
        protected override IMaxentModel DoTrain(IDataIndexer indexer)
        {
            var threads = GetIntParam("Threads", 1);

            return(TrainModel(Iterations, indexer, false, null, 0, threads, Monitor));
        }
Esempio n. 37
0
        /// <summary>
        /// Train a model using the GIS algorithm.
        /// </summary>
        /// <param name="iterations">The number of GIS iterations to perform.</param>
        /// <param name="di">The data indexer used to compress events in memory.</param>
        /// <param name="modelPrior">The prior distribution used to train this model.</param>
        /// <param name="modelCutoff">The number of times a feature must occur to be included.</param>
        /// <param name="threads">The number of threads used to train this model.</param>
        /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns>
        public GISModel TrainModel(int iterations, IDataIndexer di, IPrior modelPrior, int modelCutoff, int threads)
        {
            if (threads <= 0)
            {
                throw new ArgumentOutOfRangeException("threads", threads, @"Threads must be at least one or greater.");
            }

            modelExpects = new MutableContext[threads][];

            info.Append("Trained using GIS algorithm.\n\n");

            // Executes the data indexer
            di.Execute();

            // Incorporate all of the needed info.
            Display("Incorporating indexed data for training...");
            contexts           = di.GetContexts();
            values             = di.Values;
            cutoff             = modelCutoff;
            predicateCounts    = di.GetPredCounts();
            numTimesEventsSeen = di.GetNumTimesEventsSeen();
            numUniqueEvents    = contexts.Length;
            prior = modelPrior;

            // determine the correction constant and its inverse
            double correctionConstant = 0;

            for (int ci = 0; ci < contexts.Length; ci++)
            {
                if (values == null || values[ci] == null)
                {
                    if (contexts[ci].Length > correctionConstant)
                    {
                        correctionConstant = contexts[ci].Length;
                    }
                }
                else
                {
                    var cl = values[ci][0];
                    for (var vi = 1; vi < values[ci].Length; vi++)
                    {
                        cl += values[ci][vi];
                    }

                    if (cl > correctionConstant)
                    {
                        correctionConstant = cl;
                    }
                }
            }

            Display("done.");

            outcomeLabels = di.GetOutcomeLabels();
            outcomeList   = di.GetOutcomeList();
            numOutcomes   = outcomeLabels.Length;

            predLabels = di.GetPredLabels();
            prior.SetLabels(outcomeLabels, predLabels);
            numPreds = predLabels.Length;

            info.Append("Number of Event Tokens: {0}\n", numUniqueEvents);
            info.Append("    Number of Outcomes: {0}\n", numOutcomes);
            info.Append("  Number of Predicates: {0}\n", numPreds);

            Display("\tNumber of Event Tokens: " + numUniqueEvents);
            Display("\t    Number of Outcomes: " + numOutcomes);
            Display("\t  Number of Predicates: " + numPreds);

            // set up feature arrays
            //var predCount = new float[numPreds][numOutcomes];

            var predCount = new float[numPreds][];

            for (int ti = 0; ti < numUniqueEvents; ti++)
            {
                for (int j = 0; j < contexts[ti].Length; j++)
                {
                    if (predCount[contexts[ti][j]] == null)
                    {
                        predCount[contexts[ti][j]] = new float[numOutcomes];
                    }

                    if (values != null && values[ti] != null)
                    {
                        predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti] * values[ti][j];
                    }
                    else
                    {
                        predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti];
                    }
                }
            }

            // ReSharper disable once RedundantAssignment
            di = null;

            // Get the observed expectations of the features. Strictly speaking,
            // we should divide the counts by the number of Tokens, but because of
            // the way the model's expectations are approximated in the
            // implementation, this is canceled out when we compute the next
            // iteration of a parameter, making the extra divisions wasteful.
            param = new MutableContext[numPreds];
            for (var i = 0; i < modelExpects.Length; i++)
            {
                modelExpects[i] = new MutableContext[numPreds];
            }

            observedExpects = new MutableContext[numPreds];


            // The model does need the correction constant and the correction feature. The correction constant
            // is only needed during training, and the correction feature is not necessary.
            // For compatibility reasons the model contains form now on a correction constant of 1,
            // and a correction param 0.
            // ReSharper disable once CoVariantArrayConversion
            evalParams = new EvalParameters(param, 0, 1, numOutcomes);

            var activeOutcomes     = new int[numOutcomes];
            var allOutcomesPattern = new int[numOutcomes];

            for (var oi = 0; oi < numOutcomes; oi++)
            {
                allOutcomesPattern[oi] = oi;
            }
            for (var pi = 0; pi < numPreds; pi++)
            {
                var   numActiveOutcomes = 0;
                int[] outcomePattern;
                if (Smoothing)
                {
                    numActiveOutcomes = numOutcomes;
                    outcomePattern    = allOutcomesPattern;
                }
                else
                {
                    //determine active outcomes
                    for (var oi = 0; oi < numOutcomes; oi++)
                    {
                        if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff)
                        {
                            activeOutcomes[numActiveOutcomes] = oi;
                            numActiveOutcomes++;
                        }
                    }
                    if (numActiveOutcomes == numOutcomes)
                    {
                        outcomePattern = allOutcomesPattern;
                    }
                    else
                    {
                        outcomePattern = new int[numActiveOutcomes];
                        for (var aoi = 0; aoi < numActiveOutcomes; aoi++)
                        {
                            outcomePattern[aoi] = activeOutcomes[aoi];
                        }
                    }
                }
                param[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]);

                foreach (MutableContext[] me in modelExpects)
                {
                    me[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]);
                }

                observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]);
                for (var aoi = 0; aoi < numActiveOutcomes; aoi++)
                {
                    var oi = outcomePattern[aoi];
                    param[pi].SetParameter(aoi, 0.0);

                    foreach (var modelExpect in modelExpects)
                    {
                        modelExpect[pi].SetParameter(aoi, 0.0);
                    }

                    if (predCount[pi][oi] > 0)
                    {
                        observedExpects[pi].SetParameter(aoi, predCount[pi][oi]);
                    }
                    else if (Smoothing)
                    {
                        observedExpects[pi].SetParameter(aoi, SmoothingObservation);
                    }
                }
            }

            Display("...done.");

            /***************** Find the parameters ************************/
            if (threads == 1)
            {
                Display("Computing model parameters ...");
            }
            else
            {
                Display("Computing model parameters in " + threads + " threads...");
            }

            FindParameters(iterations, correctionConstant);

            /*************** Create and return the model ******************/

            // To be compatible with old models the correction constant is always 1

            // ReSharper disable once CoVariantArrayConversion
            return(new GISModel(param, predLabels, outcomeLabels, 1, evalParams.CorrectionParam)
            {
                info = TrainingInfo
            });
        }