/// <summary> /// Learns a model that can map the given inputs to the desired outputs. /// </summary> /// <param name="x">The model inputs.</param> /// <param name="weights">The weight of importance for each input sample.</param> /// <returns>A model that has learned how to produce suitable outputs /// given the input data <paramref name="x" />.</returns> public TModel Learn(TFeature[] x, double[] weights = null) { if (weights != null && x.Length != weights.Length) { throw new DimensionMismatchException("weights", "The weights vector should have the same length as x."); } if (x.Length <= NumberOfWords) { throw new InvalidOperationException("Not enough data points to cluster. Please try " + "to adjust the feature extraction algorithm to generate more points."); } this.Statistics = new BagOfWordsStatistics() { TotalNumberOfDescriptors = x.Length, }; return(learn(x, weights)); }
/// <summary> /// Generic learn method implementation that should work for any input type. /// This method is useful for re-using code between methods that accept Bitmap, /// BitmapData, UnmanagedImage, filenames as strings, etc. /// </summary> /// /// <typeparam name="T">The input type.</typeparam> /// /// <param name="x">The inputs.</param> /// <param name="weights">The weights.</param> /// <param name="extractor">A function that knows how to process the input /// and extract features from them.</param> /// /// <returns>The trained model.</returns> /// protected TModel InnerLearn <T>(T[] x, double[] weights, Func <T, TExtractor, IEnumerable <TPoint> > extractor) { var descriptorsPerInstance = new TFeature[x.Length][]; var totalDescriptorCounts = new double[x.Length]; int takenDescriptorCount = 0; // For all instances For(0, x.Length, (i, detector) => { if (NumberOfDescriptors > 0 && takenDescriptorCount >= NumberOfDescriptors) { return; } TFeature[] desc = extractor(x[i], detector).Select(p => p.Descriptor).ToArray(); totalDescriptorCounts[i] = desc.Length; if (MaxDescriptorsPerInstance > 0) { desc = desc.Sample(MaxDescriptorsPerInstance); } Interlocked.Add(ref takenDescriptorCount, desc.Length); descriptorsPerInstance[i] = desc; }); if (NumberOfDescriptors >= 0 && takenDescriptorCount < NumberOfDescriptors) { throw new InvalidOperationException("There were not enough descriptors to sample the desired amount " + "of samples ({0}). Please either increase the number of images, or increase the number of ".Format(NumberOfDescriptors) + "descriptors that are sampled from each image by adjusting the MaxSamplesPerImage property ({0}).".Format(MaxDescriptorsPerInstance)); } var totalDescriptors = new TFeature[takenDescriptorCount]; var totalWeights = weights != null ? new double[takenDescriptorCount] : null; int[] instanceIndices = new int[takenDescriptorCount]; int c = 0, w = 0; for (int i = 0; i < descriptorsPerInstance.Length; i++) { if (descriptorsPerInstance[i] != null) { if (weights != null) { totalWeights[w++] = weights[i]; } for (int j = 0; j < descriptorsPerInstance[i].Length; j++) { totalDescriptors[c] = descriptorsPerInstance[i][j]; instanceIndices[c] = i; c++; } } } if (NumberOfDescriptors > 0) { int[] idx = Vector.Sample(NumberOfDescriptors); totalDescriptors = totalDescriptors.Get(idx); instanceIndices = instanceIndices.Get(idx); } int[] hist = instanceIndices.Histogram(); Debug.Assert(hist.Sum() == (NumberOfDescriptors > 0 ? NumberOfDescriptors : takenDescriptorCount)); this.Statistics = new BagOfWordsStatistics() { TotalNumberOfInstances = x.Length, TotalNumberOfDescriptors = (int)totalDescriptorCounts.Sum(), TotalNumberOfDescriptorsPerInstance = NormalDistribution.Estimate(totalDescriptorCounts, new NormalOptions { Robust = true }), TotalNumberOfDescriptorsPerInstanceRange = new IntRange((int)totalDescriptorCounts.Min(), (int)totalDescriptorCounts.Max()), NumberOfInstancesTaken = hist.Length, NumberOfDescriptorsTaken = totalDescriptors.Length, NumberOfDescriptorsTakenPerInstance = NormalDistribution.Estimate(hist.ToDouble(), new NormalOptions { Robust = true }), NumberOfDescriptorsTakenPerInstanceRange = new IntRange(hist.Min(), hist.Max()) }; return(learn(totalDescriptors, totalWeights)); }