public static void Process(string SearchName) { /* determin which dataacquirer and which dataindexer to be instantiated with "Arg" argument */ SearchElement search = Config.Searches[SearchName]; Type AcquirerType = Type.GetType(search.DataAcquirer); Type IndexerType = Type.GetType(search.DataIndexer); dataAcquirer = (IDataAcquirer)Activator.CreateInstance(AcquirerType, new object[] { search }); dataIndexer = (IDataIndexer)Activator.CreateInstance(IndexerType, new object[] { search }); /* end */ List<ISearchEntity> data = dataAcquirer.GetData(); List<ISearchEntity> CreateEntities = new List<ISearchEntity>(); List<ISearchEntity> DeleteEntities = new List<ISearchEntity>(); foreach (ISearchEntity entity in data) { if (entity.ProcessType == EntityProcessType.insert) AddCreateIndex(CreateEntities, entity); else if (entity.ProcessType == EntityProcessType.update) AddCreateIndex(CreateEntities, entity); else if (entity.ProcessType == EntityProcessType.delete) DeleteEntities.Add(entity); else continue; } dataIndexer.CreateIndex(CreateEntities); dataIndexer.DeleteIndex(DeleteEntities); Console.Read(); }
/// <summary> /// Retrieves the entity of the specified type from the data store. /// </summary> /// <param name="propertyName">Name of the property to match.</param> /// <param name="propertyValue">The value of the property to match.</param> /// <returns>The entity of the specified type found in the data store.</returns> public override IEntity GetEntity(Type type, string propertyName, object propertyValue) { IEntity entity = null; using (LogGroup logGroup = LogGroup.StartDebug("Retrieving the entity of the specified type with a property matching the provided property name and value.")) { IDataIndexer indexer = Provider.InitializeDataIndexer(); indexer.AutoRelease = AutoRelease; indexer.DataStore = DataStore; IEntity[] entities = indexer.GetEntities(type, propertyName, propertyValue); if (entities != null && entities.Length > 0) { entity = entities[0]; } if (entity == null) { LogWriter.Debug("Entity: [null]"); } else { LogWriter.Debug("Entity ID: " + entity.ID.ToString()); } } return(entity); }
/// <summary> /// Initializes a new instance of the <see cref="NegLogLikelihood"/> class. /// </summary> /// <param name="indexer">The data indexer.</param> public NegLogLikelihood(IDataIndexer indexer) { if (indexer == null) { throw new ArgumentNullException(nameof(indexer)); } if (!indexer.Completed) { indexer.Execute(); } values = indexer is OnePassRealValueDataIndexer ? indexer.Values : null; contexts = indexer.GetContexts(); outcomeList = indexer.GetOutcomeList(); numTimesEventsSeen = indexer.GetNumTimesEventsSeen(); numOutcomes = indexer.GetOutcomeLabels().Length; numFeatures = indexer.GetPredLabels().Length; numContexts = contexts.Length; dimension = numOutcomes * numFeatures; expectation = new double[numOutcomes]; tempSums = new double[numOutcomes]; gradient = new double[dimension]; }
/// <summary> /// Retrieves the entity of the specified type matching the specified values. /// </summary> /// <param name="type">The type of entity to retrieve.</param> /// <param name="parameters">The parameters to query with.</param> /// <returns></returns> public override IEntity GetEntity(Type type, Dictionary <string, object> parameters) { IEntity entity = null; using (LogGroup logGroup = LogGroup.StartDebug("Retrieving the entity of the specified type matching the provided parameters.")) { foreach (string key in parameters.Keys) { LogWriter.Debug("Parameter: " + key + " = " + parameters[key].ToString()); } IDataIndexer indexer = Provider.InitializeDataIndexer(); indexer.AutoRelease = AutoRelease; indexer.DataStore = DataStore; IEntity[] entities = indexer.GetEntities(type, parameters); // TODO: Check if this should be ignored. if (entities.Length > 1) { throw new Exception("More than one match found when there should only be one."); } if (entities == null || entities.Length == 0) { entity = null; } else { entity = entities[0]; } } return(entity); }
/// <summary> /// Execute the training operation. /// </summary> /// <param name="indexer">The data indexer.</param> /// <returns>The trained <see cref="IMaxentModel"/> model.</returns> protected override IMaxentModel DoTrain(IDataIndexer indexer) { Display("Incorporating indexed data for training..."); indexer.Execute(); contexts = indexer.GetContexts(); values = indexer.Values; numTimesEventsSeen = indexer.GetNumTimesEventsSeen(); numEvents = indexer.GetNumEvents(); numUniqueEvents = contexts.Length; outcomeLabels = indexer.GetOutcomeLabels(); outcomeList = indexer.GetOutcomeList(); predLabels = indexer.GetPredLabels(); numPreds = predLabels.Length; numOutcomes = outcomeLabels.Length; Display("done."); Display("\tNumber of Event Tokens: " + numUniqueEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); Display("Computing model parameters..."); // ReSharper disable once CoVariantArrayConversion - we read the parameters ;) Context[] finalParameters = FindParameters(); Display("...done.\n"); return(new NaiveBayesModel(finalParameters, predLabels, outcomeLabels)); }
/// <summary> /// Initializes a new instance of the <see cref="QNModelEvaluator"/> class. /// </summary> /// <param name="indexer">The data indexer.</param> /// <exception cref="System.ArgumentNullException">The <paramref name="indexer"/> is null.</exception> public QNModelEvaluator(IDataIndexer indexer) { if (indexer == null) { throw new ArgumentNullException(nameof(indexer)); } this.indexer = indexer; }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param> /// <param name="modelPrior">The prior distribution for the model.</param> /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param> /// <param name="threads">The number of threads to use during the training.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns> public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, IPrior modelPrior, int cutoff, int threads, Monitor monitor) { var trainer = new GISTrainer(monitor) { Smoothing = smoothing, SmoothingObservation = SmoothingObservation }; if (modelPrior == null) { modelPrior = new UniformPrior(); } return(trainer.TrainModel(iterations, indexer, modelPrior, cutoff, threads)); }
/// <summary> /// Initializes a new instance of the <see cref="NegLogLikelihood"/> class. /// </summary> /// <param name="indexer">The data indexer.</param> /// <param name="threads">The number of threads.</param> public ParallelNegLogLikelihood(IDataIndexer indexer, int threads) : base(indexer) { if (threads <= 0) { throw new ArgumentOutOfRangeException(nameof(threads), "The number of threads must be 1 or larger."); } this.threads = threads; negLogLikelihoodThread = new double[threads]; gradientThread = new double[threads][]; for (var i = 0; i < threads; i++) { gradientThread[i] = new double[Dimension]; } }
/// <summary> /// Retrieves the first/only entity that has a reference matching the provided parameters. /// </summary> /// <param name="propertyName">The name of the property to query for.</param> /// <param name="referencedEntityType">The type of entity being referenced.</param> /// <param name="referencedEntityID">The ID of the referenced entity to match.</param> /// <returns>An array of the objects retrieved.</returns> public override T GetEntityWithReference <T>(string propertyName, Type referencedEntityType, Guid referencedEntityID) { IDataIndexer indexer = Provider.InitializeDataIndexer(); indexer.AutoRelease = AutoRelease; indexer.DataStore = DataStore; T[] entities = indexer.GetEntitiesWithReference <T>(propertyName, referencedEntityType, referencedEntityID); if (entities.Length == 0) { return(default(T)); } else { return(entities[0]); } }
/// <summary> /// Retrieves the entity matching the filter group. /// </summary> /// <param name="group">The group of filters to apply to the query.</param> /// <returns>The matching entity.</returns> public override IEntity GetEntity(FilterGroup group) { IDataIndexer indexer = Provider.InitializeDataIndexer(); indexer.AutoRelease = AutoRelease; indexer.DataStore = DataStore; IEntity[] entities = indexer.GetEntities(group); if (entities != null && entities.Length > 0) { return(entities[0]); } else { return(null); } }
/// <summary> /// Train a model using the Perceptron algorithm. /// </summary> /// <param name="iterations">The number of Perceptron iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param> /// <param name="useAverage"></param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="IO.PerceptronModelWriter"/> object.</returns> public AbstractModel TrainModel(int iterations, IDataIndexer indexer, int cutoff, bool useAverage) { Display("Incorporating indexed data for training..."); info.Append("Trained using Perceptron algorithm.\n\n"); // Executes the data indexer indexer.Execute(); contexts = indexer.GetContexts(); values = indexer.Values; numTimesEventsSeen = indexer.GetNumTimesEventsSeen(); numEvents = indexer.GetNumEvents(); numUniqueEvents = contexts.Length; outcomeLabels = indexer.GetOutcomeLabels(); outcomeList = indexer.GetOutcomeList(); predLabels = indexer.GetPredLabels(); numPreds = predLabels.Length; numOutcomes = outcomeLabels.Length; Display("\ndone.\n"); info.Append("Number of Event Tokens: {0}\n" + " Number of Outcomes: {1}\n" + " Number of Predicates: {2}\n", numEvents, numOutcomes, numPreds); Display("\tNumber of Event Tokens: " + numUniqueEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); Display("Computing model parameters."); var finalParameters = FindParameters(iterations, useAverage); Display("\ndone.\n"); // ReSharper disable once CoVariantArrayConversion return(new PerceptronModel(finalParameters, predLabels, outcomeLabels) { info = info }); }
public static void Process(string SearchName) { /* determin which dataacquirer and which dataindexer to be instantiated with "Arg" argument */ SearchElement search = Config.Searches[SearchName]; Type AcquirerType = Type.GetType(search.DataAcquirer); Type IndexerType = Type.GetType(search.DataIndexer); dataAcquirer = (IDataAcquirer)Activator.CreateInstance(AcquirerType, new object[] { search }); dataIndexer = (IDataIndexer)Activator.CreateInstance(IndexerType, new object[] { search }); /* end */ List <ISearchEntity> data = dataAcquirer.GetData(); List <ISearchEntity> CreateEntities = new List <ISearchEntity>(); List <ISearchEntity> DeleteEntities = new List <ISearchEntity>(); foreach (ISearchEntity entity in data) { if (entity.ProcessType == EntityProcessType.insert) { AddCreateIndex(CreateEntities, entity); } else if (entity.ProcessType == EntityProcessType.update) { AddCreateIndex(CreateEntities, entity); } else if (entity.ProcessType == EntityProcessType.delete) { DeleteEntities.Add(entity); } else { continue; } } dataIndexer.CreateIndex(CreateEntities); dataIndexer.DeleteIndex(DeleteEntities); Console.Read(); }
private bool CompareDoubleArray(double[] expected, double[] actual, IDataIndexer indexer, double tolerance) { var alignedActual = AlignDoubleArrayForTestData(actual, indexer.GetPredLabels(), indexer.GetOutcomeLabels()); if (expected.Length != alignedActual.Length) { return(false); } for (var i = 0; i < alignedActual.Length; i++) { if (Math.Abs(alignedActual[i] - expected[i]) > tolerance) { return(false); } } return(true); }
/// <summary> /// Retrieves the entity of the specified type matching the specified values. /// </summary> /// <param name="parameters">The parameters to query with.</param> /// <returns></returns> public override T GetEntity <T>(Dictionary <string, object> parameters) { T entity = default(T); using (LogGroup logGroup = LogGroup.StartDebug("Retrieving the entity of the specified type matching the provided entities.")) { IDataIndexer indexer = Provider.InitializeDataIndexer(); indexer.AutoRelease = AutoRelease; indexer.DataStore = DataStore; T[] entities = indexer.GetEntities <T>(parameters); if (entities == null || entities.Length == 0) { entity = default(T); } else { entity = entities[0]; } } return(entity); }
protected override IMaxentModel DoTrain(IDataIndexer indexer) { if (!IsValid()) { throw new InvalidOperationException("trainParams are not valid!"); } var useAverage = GetBoolParam(Parameters.UseAverage, true); UseSkippedAveraging = GetBoolParam(Parameters.UseSkippedAveraging, false); // overwrite otherwise it might not work if (UseSkippedAveraging) { useAverage = true; } StepSizeDecrease = GetDoubleParam(Parameters.StepSizeDecrease, 0d); Tolerance = GetDoubleParam(Parameters.Tolerance, DefaultTolerance); return(TrainModel(Iterations, indexer, Cutoff, useAverage)); }
/// <summary> /// Retrieves the entity of the specified type from the data store. /// </summary> /// <param name="propertyName">Name of the property to match.</param> /// <param name="propertyValue">The value of the property to match.</param> /// <returns>The entity of the specified type found in the data store.</returns> public override T GetEntity <T>(string propertyName, object propertyValue) { T entity = default(T); using (LogGroup logGroup = LogGroup.StartDebug("Retrieving the entity of the specified type matching the provided property value.")) { IDataIndexer indexer = Provider.InitializeDataIndexer(); indexer.AutoRelease = AutoRelease; indexer.DataStore = DataStore; T[] entities = indexer.GetEntities <T>(propertyName, propertyValue); if (entities == null || entities.Length == 0) { entity = default(T); } else { entity = entities[0]; } } return(entity); }
/// <summary> /// Train a model using the Perceptron algorithm. /// </summary> /// <param name="iterations">The number of Perceptron iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="IO.PerceptronModelWriter"/> object.</returns> public AbstractModel TrainModel(int iterations, IDataIndexer indexer, int cutoff) { return TrainModel(iterations, indexer, cutoff, true); }
/// <summary> /// Train a model using the Perceptron algorithm. /// </summary> /// <param name="iterations">The number of Perceptron iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param> /// <param name="useAverage"></param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="IO.PerceptronModelWriter"/> object.</returns> public AbstractModel TrainModel(int iterations, IDataIndexer indexer, int cutoff, bool useAverage) { Display("Incorporating indexed data for training..."); info.Append("Trained using Perceptron algorithm.\n\n"); // Executes the data indexer indexer.Execute(); contexts = indexer.GetContexts(); values = indexer.Values; numTimesEventsSeen = indexer.GetNumTimesEventsSeen(); numEvents = indexer.GetNumEvents(); numUniqueEvents = contexts.Length; outcomeLabels = indexer.GetOutcomeLabels(); outcomeList = indexer.GetOutcomeList(); predLabels = indexer.GetPredLabels(); numPreds = predLabels.Length; numOutcomes = outcomeLabels.Length; Display("\ndone.\n"); info.Append("Number of Event Tokens: {0}\n" + " Number of Outcomes: {1}\n" + " Number of Predicates: {2}\n", numEvents, numOutcomes, numPreds); Display("\tNumber of Event Tokens: " + numUniqueEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); Display("Computing model parameters."); var finalParameters = FindParameters(iterations, useAverage); Display("\ndone.\n"); // ReSharper disable once CoVariantArrayConversion return new PerceptronModel(finalParameters, predLabels, outcomeLabels) { info = info }; }
/// <summary> /// Execute the training operation. /// </summary> /// <param name="indexer">The data indexer.</param> /// <returns>The trained <see cref="IMaxentModel"/> model.</returns> protected abstract IMaxentModel DoTrain(IDataIndexer indexer);
/// <summary> /// Execute the training operation. /// </summary> /// <param name="indexer">The data indexer.</param> /// <returns>The trained <see cref="IMaxentModel"/> model.</returns> protected override IMaxentModel DoTrain(IDataIndexer indexer) { return(TrainModel(Iterations, indexer)); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param> /// <param name="modelPrior">The prior distribution for the model.</param> /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param> /// <param name="threads">The number of threads to use during the training.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns> public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, IPrior modelPrior, int cutoff, int threads, Monitor monitor) { var trainer = new GISTrainer(monitor) { Smoothing = smoothing, SmoothingObservation = SmoothingObservation }; if (modelPrior == null) { modelPrior = new UniformPrior(); } return trainer.TrainModel(iterations, indexer, modelPrior, cutoff, threads); }
/// <summary> /// Perform the training process using the specified <paramref name="indexer"/> object. /// </summary> /// <param name="indexer">The indexer.</param> /// <returns>The trained <see cref="IMaxentModel"/> model.</returns> protected override IMaxentModel DoTrain(IDataIndexer indexer) { var threads = GetIntParam("Threads", 1); return TrainModel(Iterations, indexer, false, null, 0, threads, Monitor); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns> public static GISModel TrainModel(int iterations, IDataIndexer indexer) { return TrainModel(iterations, indexer, false, null, 0); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns> public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, Monitor monitor) { return(TrainModel(iterations, indexer, smoothing, null, 0, monitor)); }
/// <summary> /// Train a model using the Perceptron algorithm. /// </summary> /// <param name="iterations">The number of Perceptron iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="IO.PerceptronModelWriter"/> object.</returns> public AbstractModel TrainModel(int iterations, IDataIndexer indexer, int cutoff) { return(TrainModel(iterations, indexer, cutoff, true)); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="di">The data indexer used to compress events in memory.</param> /// <param name="modelCutoff">The number of times a feature must occur to be included.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns> public GISModel TrainModel(int iterations, IDataIndexer di, int modelCutoff) { return TrainModel(iterations, di, new UniformPrior(), modelCutoff, 1); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="di">The data indexer used to compress events in memory.</param> /// <param name="modelPrior">The prior distribution used to train this model.</param> /// <param name="modelCutoff">The number of times a feature must occur to be included.</param> /// <param name="threads">The number of threads used to train this model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns> public GISModel TrainModel(int iterations, IDataIndexer di, IPrior modelPrior, int modelCutoff, int threads) { if (threads <= 0) throw new ArgumentOutOfRangeException("threads", threads, @"Threads must be at least one or greater."); modelExpects = new MutableContext[threads][]; info.Append("Trained using GIS algorithm.\n\n"); // Executes the data indexer di.Execute(); // Incorporate all of the needed info. Display("Incorporating indexed data for training..."); contexts = di.GetContexts(); values = di.Values; cutoff = modelCutoff; predicateCounts = di.GetPredCounts(); numTimesEventsSeen = di.GetNumTimesEventsSeen(); numUniqueEvents = contexts.Length; prior = modelPrior; // determine the correction constant and its inverse double correctionConstant = 0; for (int ci = 0; ci < contexts.Length; ci++) { if (values == null || values[ci] == null) { if (contexts[ci].Length > correctionConstant) { correctionConstant = contexts[ci].Length; } } else { var cl = values[ci][0]; for (var vi = 1; vi < values[ci].Length; vi++) { cl += values[ci][vi]; } if (cl > correctionConstant) { correctionConstant = cl; } } } Display("done."); outcomeLabels = di.GetOutcomeLabels(); outcomeList = di.GetOutcomeList(); numOutcomes = outcomeLabels.Length; predLabels = di.GetPredLabels(); prior.SetLabels(outcomeLabels, predLabels); numPreds = predLabels.Length; info.Append("Number of Event Tokens: {0}\n", numUniqueEvents); info.Append(" Number of Outcomes: {0}\n", numOutcomes); info.Append(" Number of Predicates: {0}\n", numPreds); Display("\tNumber of Event Tokens: " + numUniqueEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); // set up feature arrays //var predCount = new float[numPreds][numOutcomes]; var predCount = new float[numPreds][]; for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].Length; j++) { if (predCount[contexts[ti][j]] == null) { predCount[contexts[ti][j]] = new float[numOutcomes]; } if (values != null && values[ti] != null) { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]*values[ti][j]; } else { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; } } } // ReSharper disable once RedundantAssignment di = null; // Get the observed expectations of the features. Strictly speaking, // we should divide the counts by the number of Tokens, but because of // the way the model's expectations are approximated in the // implementation, this is canceled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. param = new MutableContext[numPreds]; for (var i = 0; i < modelExpects.Length; i++) modelExpects[i] = new MutableContext[numPreds]; observedExpects = new MutableContext[numPreds]; // The model does need the correction constant and the correction feature. The correction constant // is only needed during training, and the correction feature is not necessary. // For compatibility reasons the model contains form now on a correction constant of 1, // and a correction param 0. // ReSharper disable once CoVariantArrayConversion evalParams = new EvalParameters(param, 0, 1, numOutcomes); var activeOutcomes = new int[numOutcomes]; var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } for (var pi = 0; pi < numPreds; pi++) { var numActiveOutcomes = 0; int[] outcomePattern; if (Smoothing) { numActiveOutcomes = numOutcomes; outcomePattern = allOutcomesPattern; } else { //determine active outcomes for (var oi = 0; oi < numOutcomes; oi++) { if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) { activeOutcomes[numActiveOutcomes] = oi; numActiveOutcomes++; } } if (numActiveOutcomes == numOutcomes) { outcomePattern = allOutcomesPattern; } else { outcomePattern = new int[numActiveOutcomes]; for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { outcomePattern[aoi] = activeOutcomes[aoi]; } } } param[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); foreach (MutableContext[] me in modelExpects) me[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { var oi = outcomePattern[aoi]; param[pi].SetParameter(aoi, 0.0); foreach (var modelExpect in modelExpects) { modelExpect[pi].SetParameter(aoi, 0.0); } if (predCount[pi][oi] > 0) { observedExpects[pi].SetParameter(aoi, predCount[pi][oi]); } else if (Smoothing) { observedExpects[pi].SetParameter(aoi, SmoothingObservation); } } } Display("...done."); /***************** Find the parameters ************************/ if (threads == 1) Display("Computing model parameters ..."); else Display("Computing model parameters in " + threads + " threads..."); FindParameters(iterations, correctionConstant); /*************** Create and return the model ******************/ // To be compatible with old models the correction constant is always 1 // ReSharper disable once CoVariantArrayConversion return new GISModel(param, predLabels, outcomeLabels, 1, evalParams.CorrectionParam) { info = TrainingInfo }; }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param> /// <param name="modelPrior">The prior distribution for the model.</param> /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns> public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, IPrior modelPrior, int cutoff) { return TrainModel(iterations, indexer, smoothing, modelPrior, cutoff, 1); }
protected override IMaxentModel DoTrain(IDataIndexer indexer) { if (!IsValid()) throw new InvalidOperationException("trainParams are not valid!"); var useAverage = GetBoolParam(Parameters.UseAverage, true); UseSkippedAveraging = GetBoolParam(Parameters.UseSkippedAveraging, false); // overwrite otherwise it might not work if (UseSkippedAveraging) useAverage = true; StepSizeDecrease = GetDoubleParam(Parameters.StepSizeDecrease, 0d); Tolerance = GetDoubleParam(Parameters.Tolerance, DefaultTolerance); return TrainModel(Iterations, indexer, Cutoff, useAverage); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns> public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, Monitor monitor) { return TrainModel(iterations, indexer, smoothing, null, 0, monitor); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="monitor"> /// A evaluation monitor that can be used to listen the messages during the training or it can cancel the training operation. /// This argument can be a <c>null</c> value. /// </param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns> public static GISModel TrainModel(int iterations, IDataIndexer indexer, Monitor monitor) { return(TrainModel(iterations, indexer, false, null, 0, monitor)); }
/// <summary> /// Execute the training operation. /// </summary> /// <param name="iterations">The number of iterations.</param> /// <param name="indexer">The data indexer.</param> /// <returns>The trained <see cref="IMaxentModel" /> model.</returns> /// <exception cref="System.ArgumentOutOfRangeException">iterations</exception> /// <exception cref="System.ArgumentNullException">indexer</exception> /// <exception cref="System.InvalidOperationException">The number of threads is invalid.</exception> public QNModel TrainModel(int iterations, IDataIndexer indexer) { if (iterations < 0) { throw new ArgumentOutOfRangeException(nameof(iterations)); } if (indexer == null) { throw new ArgumentNullException(nameof(indexer)); } IFunction function; if (threads == 1) { Display("Computing model parameters ..."); function = new NegLogLikelihood(indexer); } else if (threads > 1) { Display("Computing model parameters in " + threads + " threads ..."); function = new ParallelNegLogLikelihood(indexer, threads); } else { throw new InvalidOperationException("The number of threads is invalid."); } if (!indexer.Completed) { indexer.Execute(); } var minimizer = new QNMinimizer(l1Cost, l2Cost, iterations, updates, maxFctEval, Monitor) { Evaluator = new QNModelEvaluator(indexer) }; // minimized parameters var mp = minimizer.Minimize(function); // construct model with trained parameters var predLabels = indexer.GetPredLabels(); var nPredLabels = predLabels.Length; var outcomeNames = indexer.GetOutcomeLabels(); var nOutcomes = outcomeNames.Length; var parameters = new Context[nPredLabels]; for (var ci = 0; ci < parameters.Length; ci++) { var outcomePattern = new List <int>(nOutcomes); var alpha = new List <double>(nOutcomes); for (var oi = 0; oi < nOutcomes; oi++) { var val = mp[oi * nPredLabels + ci]; outcomePattern.Add(oi); alpha.Add(val); } parameters[ci] = new Context(outcomePattern.ToArray(), alpha.ToArray()); } return(new QNModel(parameters, predLabels, outcomeNames)); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="indexer">The object which will be used for event compilation.</param> /// <param name="smoothing">Defines whether the created trainer will use smoothing while training the model.</param> /// <param name="modelPrior">The prior distribution for the model.</param> /// <param name="cutoff">The number of times a predicate must occur to be used in a model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using a <see cref="GISModelWriter"/> object.</returns> public static GISModel TrainModel(int iterations, IDataIndexer indexer, bool smoothing, IPrior modelPrior, int cutoff) { return(TrainModel(iterations, indexer, smoothing, modelPrior, cutoff, 1)); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="di">The data indexer used to compress events in memory.</param> /// <param name="modelCutoff">The number of times a feature must occur to be included.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns> public GISModel TrainModel(int iterations, IDataIndexer di, int modelCutoff) { return(TrainModel(iterations, di, new UniformPrior(), modelCutoff, 1)); }
/// <summary> /// Perform the training process using the specified <paramref name="indexer"/> object. /// </summary> /// <param name="indexer">The indexer.</param> /// <returns>The trained <see cref="IMaxentModel"/> model.</returns> protected override IMaxentModel DoTrain(IDataIndexer indexer) { var threads = GetIntParam("Threads", 1); return(TrainModel(Iterations, indexer, false, null, 0, threads, Monitor)); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="di">The data indexer used to compress events in memory.</param> /// <param name="modelPrior">The prior distribution used to train this model.</param> /// <param name="modelCutoff">The number of times a feature must occur to be included.</param> /// <param name="threads">The number of threads used to train this model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns> public GISModel TrainModel(int iterations, IDataIndexer di, IPrior modelPrior, int modelCutoff, int threads) { if (threads <= 0) { throw new ArgumentOutOfRangeException("threads", threads, @"Threads must be at least one or greater."); } modelExpects = new MutableContext[threads][]; info.Append("Trained using GIS algorithm.\n\n"); // Executes the data indexer di.Execute(); // Incorporate all of the needed info. Display("Incorporating indexed data for training..."); contexts = di.GetContexts(); values = di.Values; cutoff = modelCutoff; predicateCounts = di.GetPredCounts(); numTimesEventsSeen = di.GetNumTimesEventsSeen(); numUniqueEvents = contexts.Length; prior = modelPrior; // determine the correction constant and its inverse double correctionConstant = 0; for (int ci = 0; ci < contexts.Length; ci++) { if (values == null || values[ci] == null) { if (contexts[ci].Length > correctionConstant) { correctionConstant = contexts[ci].Length; } } else { var cl = values[ci][0]; for (var vi = 1; vi < values[ci].Length; vi++) { cl += values[ci][vi]; } if (cl > correctionConstant) { correctionConstant = cl; } } } Display("done."); outcomeLabels = di.GetOutcomeLabels(); outcomeList = di.GetOutcomeList(); numOutcomes = outcomeLabels.Length; predLabels = di.GetPredLabels(); prior.SetLabels(outcomeLabels, predLabels); numPreds = predLabels.Length; info.Append("Number of Event Tokens: {0}\n", numUniqueEvents); info.Append(" Number of Outcomes: {0}\n", numOutcomes); info.Append(" Number of Predicates: {0}\n", numPreds); Display("\tNumber of Event Tokens: " + numUniqueEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); // set up feature arrays //var predCount = new float[numPreds][numOutcomes]; var predCount = new float[numPreds][]; for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].Length; j++) { if (predCount[contexts[ti][j]] == null) { predCount[contexts[ti][j]] = new float[numOutcomes]; } if (values != null && values[ti] != null) { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti] * values[ti][j]; } else { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; } } } // ReSharper disable once RedundantAssignment di = null; // Get the observed expectations of the features. Strictly speaking, // we should divide the counts by the number of Tokens, but because of // the way the model's expectations are approximated in the // implementation, this is canceled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. param = new MutableContext[numPreds]; for (var i = 0; i < modelExpects.Length; i++) { modelExpects[i] = new MutableContext[numPreds]; } observedExpects = new MutableContext[numPreds]; // The model does need the correction constant and the correction feature. The correction constant // is only needed during training, and the correction feature is not necessary. // For compatibility reasons the model contains form now on a correction constant of 1, // and a correction param 0. // ReSharper disable once CoVariantArrayConversion evalParams = new EvalParameters(param, 0, 1, numOutcomes); var activeOutcomes = new int[numOutcomes]; var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } for (var pi = 0; pi < numPreds; pi++) { var numActiveOutcomes = 0; int[] outcomePattern; if (Smoothing) { numActiveOutcomes = numOutcomes; outcomePattern = allOutcomesPattern; } else { //determine active outcomes for (var oi = 0; oi < numOutcomes; oi++) { if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) { activeOutcomes[numActiveOutcomes] = oi; numActiveOutcomes++; } } if (numActiveOutcomes == numOutcomes) { outcomePattern = allOutcomesPattern; } else { outcomePattern = new int[numActiveOutcomes]; for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { outcomePattern[aoi] = activeOutcomes[aoi]; } } } param[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); foreach (MutableContext[] me in modelExpects) { me[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); } observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { var oi = outcomePattern[aoi]; param[pi].SetParameter(aoi, 0.0); foreach (var modelExpect in modelExpects) { modelExpect[pi].SetParameter(aoi, 0.0); } if (predCount[pi][oi] > 0) { observedExpects[pi].SetParameter(aoi, predCount[pi][oi]); } else if (Smoothing) { observedExpects[pi].SetParameter(aoi, SmoothingObservation); } } } Display("...done."); /***************** Find the parameters ************************/ if (threads == 1) { Display("Computing model parameters ..."); } else { Display("Computing model parameters in " + threads + " threads..."); } FindParameters(iterations, correctionConstant); /*************** Create and return the model ******************/ // To be compatible with old models the correction constant is always 1 // ReSharper disable once CoVariantArrayConversion return(new GISModel(param, predLabels, outcomeLabels, 1, evalParams.CorrectionParam) { info = TrainingInfo }); }