/// <summary> /// Generates the hashcode of this state as if it includes an additional feature. /// </summary> /// <param name="withFeature"></param> /// <returns></returns> public int GetHashCodeWith(FeatureValuePair withFeature) { var newFeatureHashSet = new HashSet <FeatureValuePair>(Features); newFeatureHashSet.Add(withFeature); return(GenerateId(newFeatureHashSet).GetHashCode()); }
public override bool Equals(object obj) { FeatureValuePair that = (FeatureValuePair)obj; return(this.Name.Equals(that.Name) && this.Value.Equals(that.Value)); }
/// <summary> /// Creates a Datavector, and adds additional information about feature rewards (or costs) and the classification label. /// </summary> /// <param name="headers">The names of the features.</param> /// <param name="dataobjects">The actual values of each feature.</param> /// <param name="importance">The relative importance of each feature (-1 to 1).</param> /// <param name="labelFeatureName">The feature to use as the label. It will be shifted out of the headers and dataobjects and stored as "Label".</param> public DataVectorTraining(string[] headers, object[] dataobjects, double[] importance, string labelFeatureName) { //Check number of headers matches number of data if ((headers.Length != dataobjects.Length) || (headers.Length != importance.Length)) { throw new FormatException("Number of headers, importance, and data per line do not match. Ensure there is a header and importance for each value."); } //Check label feature is valid if (!headers.Contains(labelFeatureName)) { throw new ArgumentException("'labelFeatureName' must exist in the list of headers."); } //Build list of features. Features = new List <FeatureValuePairWithImportance>().Cast <FeatureValuePair>().ToList(); for (int i = 0; i < headers.Length; i++) { AddFeature(headers[i], dataobjects[i], importance[i]); } //Set label FeatureValuePair labelFeature = Features.Find(f => f.Name == labelFeatureName); SetLabel(labelFeature.Name, labelFeature.Value); //Remove label from the list of features Features.RemoveAll(p => p.Name == Label.Name); }
public void RemoveLabel(FeatureValuePair label) { lock (processLock) { foreach (State state in this.StateSpace.Values) { state.RemoveLabel(label); } } }
//Constructors /// <summary> /// Creates a new state by combining an existing state and new feature. Queries are updated using the datavector. /// </summary> /// <param name="original"></param> /// <param name="additionalFeature"></param> /// <param name="dataVector"></param> public State(State original, FeatureValuePair additionalFeature, DataVectorTraining dataVector) : this(original, dataVector) { ////Check for disposed and null //if (additionalFeature == null) // throw new ArgumentNullException("additionalFeature"); //if (additionalFeature.IsDisposed) // throw new ArgumentException("Parameter is disposed.", "additionalFeature"); //Add the feature AddFeature(additionalFeature); }
public void RemoveStatesWithFeature(FeatureValuePair theFeature) { lock (processLock) { foreach (State theState in this.StateSpace.Values.ToList()) { if (theState.Features.Contains(theFeature)) { this.StateSpace.Remove(theState.GetHashCode()); } } } }
/// <summary> /// Generates the hashcode of this state as if the specied feature is removed. /// </summary> /// <param name="WithoutFeature"></param> /// <returns></returns> public int GetHashCodeWithout(FeatureValuePair WithoutFeature) { //Copy list of features var newFeatureHashSet = new HashSet <FeatureValuePair>(Features); //Remove specified feature newFeatureHashSet.Remove(WithoutFeature); //Return zero if no features if (newFeatureHashSet.Count == 0) { return(0); } return(GenerateId(newFeatureHashSet).GetHashCode()); }
public void RemoveQueriesWithFeature(FeatureValuePair theFeature) { lock (processLock) { foreach (State theState in this.StateSpace.Values.ToList()) { foreach (Query theQuery in theState.Queries.Keys.ToList()) { if (theQuery.Feature.Equals(theFeature)) { theState.Queries.Remove(theQuery); } } } } }
//Methods /// <summary> /// Adds the given feature to the state and removes related queries. This feature is /// additionaly marked as the "MostRecentFeature" for convienance. /// </summary> /// <param name="theFeature"></param> private void AddFeature(FeatureValuePair theFeature) { //Add to list of features FeatureValuePair fvp = new FeatureValuePair(theFeature.Name, theFeature.Value); //copy to prevent storing derived classes such as FeatureValuePairWithImportance Features.Add(fvp); FeatureNames.Add(theFeature.Name); //Remove queries with same feature name foreach (var q in Queries.ToList()) { if (q.Key.Feature.Name == theFeature.Name) { Queries.Remove(q.Key); } } }
/// <summary> /// Updates the percentage probability of each label at this state. /// </summary> /// <param name="correctLabel"></param> public void AdjustLabels(FeatureValuePair correctLabel) { //Check impurity. If it is very high, reset the labels. if (GiniImpurity > 0.99) { foreach (var l in Labels.ToList()) { Labels[l.Key] = 0; LabelsCount[l.Key] = 0; } } //Reduce label counts occasionally (to prevent going to infinity) if (LabelsCount.Sum(p => p.Value) > 10000) { foreach (FeatureValuePair label in Labels.Select(p => p.Key)) { LabelsCount[label] /= 10; } } //Add missing label if (!Labels.ContainsKey(correctLabel)) { Labels.Add(correctLabel, 0.0); LabelsCount.Add(correctLabel, 0); } //Increase experiences of label LabelsCount[correctLabel]++; //Recalculate percentages and gini impurity double sumCount = LabelsCount.Sum(p => p.Value); double sumGini = 0; foreach (var l in Labels.ToList()) { double labelPercent = LabelsCount[l.Key] / (sumCount); // 0.0 to 1.0 Labels[l.Key] = labelPercent; sumGini += Math.Pow(labelPercent, 2.0); } double maxGini = 1.00000001 - (1.0 / Labels.Count); // 0.000001 prevents division by zero. GiniImpurity = (1 - sumGini) / maxGini; }
//Constructors public Query(FeatureValuePair datavectorFeature, FeatureValuePair label) { //Check for nulls if (datavectorFeature == null || label == null) { throw new ArgumentException("Parameters cannot be null."); } if (datavectorFeature.Name == null || datavectorFeature.Value == null) { throw new ArgumentException("DatavectorFeature's Name and Value parameters cannot be null."); } if (label.Name == null || label.Value == null) { throw new ArgumentException("Labels's Name and Value parameters cannot be null."); } //Save parameters this.Feature = new FeatureValuePair(datavectorFeature.Name, datavectorFeature.Value); //To prevent additional details being stored by a derived object. this.Label = new FeatureValuePair(label.Name, label.Value); }
public void RemoveLabel(FeatureValuePair label) { this.Labels.Remove(label); this.LabelsCount.Remove(label); }
/// <summary> /// Selects the best group of queries, then compares them to the appropriate label. /// If the query's expected reward is better than the label, it returns the query. /// If the label's expected reward is better, than it returns null, to indicate querying is not the recommended action. /// </summary> /// <param name="dataVector"></param> /// <returns></returns> public Query GetBestQuery(DataVector dataVector) { //Try to add new details if (dataVector.GetType() == typeof(DataVectorTraining)) { AddMissingQueriesAndLabels((DataVectorTraining)dataVector); } //Get best queries (general) var bestQueriesGroup = GetAverageGroupQueries(); //Build list of possible queries, that match datavector var possibleQueries = bestQueriesGroup.Where(q => dataVector.Features.Find(f => q.Key.Feature.Equals(f)) != null ).ToList(); //If no possibilities if (possibleQueries.Count == 0) { return(null); } //Result variable Query bestQueryResult = null; //Default: don't query, because the labels provide the best reward. #region Find best query, Version 1 //Find best query for each label by expected reward List <KeyValuePair <Query, double> > bestQueries = new List <KeyValuePair <Query, double> >(); foreach (var labelPair in Labels.ToList()) { //Get label details FeatureValuePair theLabel = labelPair.Key; double theLabelExpectedReward = labelPair.Value; //Filter list by label var bestQueriesByLabel = possibleQueries.Where(q => q.Key.Label.Equals(theLabel)).ToList(); if (bestQueriesByLabel.Count == 0) { continue; } //Get best query details var bestQueryPair = bestQueriesByLabel.OrderByDescending(p => p.Value).First(); Query bestQuery = bestQueryPair.Key; double bestQueryExpectedReward = bestQueryPair.Value; //Is query better than label if (bestQueryExpectedReward > theLabelExpectedReward) { bestQueries.Add(bestQueryPair); } } //Pick final answer if (bestQueries.Count > 0) { bestQueryResult = bestQueries.OrderByDescending(q => q.Value).First().Key; } #endregion #region Find best query, Version 2 -- this may work, and would be faster. ////Find best query pair //var bestQueryPair2 = possibleQueries.OrderByDescending(p => p.Value).First(); //Query bestQuery2 = bestQueryPair2.Key; //double bestQuery2ExpectedReward = bestQueryPair2.Value; ////Find label //var labelPair2 = Labels.ToList().Find(p => p.Key.Equals(bestQuery2.Label)); //Feature theLabel2 = labelPair2.Key; //double theLabel2ExpectedReward = labelPair2.Value; ////If query has higher expected reward, select it as the option. //Query bestQueryResult2 = null; //if (bestQuery2ExpectedReward > theLabel2ExpectedReward) // bestQueryResult2 = bestQuery2; #endregion return(bestQueryResult); }
public void SetLabel(string featureName, object value) { this.Label = new FeatureValuePair(featureName, value); this.Features.RemoveAll(p => p.Name == featureName); }
//Methods public void AddFeature(string featureName, object value) { FeatureValuePair fvp = new FeatureValuePair(featureName, value); Features.Add(fvp); }