コード例 #1
0
        /// <summary>
        /// Learns a model that can map the given inputs to the desired outputs.
        /// </summary>
        /// <param name="x">The model inputs.</param>
        /// <param name="weights">The weight of importance for each input sample.</param>
        /// <returns>A model that has learned how to produce suitable outputs
        /// given the input data <paramref name="x" />.</returns>
        public TModel Learn(TFeature[] x, double[] weights = null)
        {
            if (weights != null && x.Length != weights.Length)
            {
                throw new DimensionMismatchException("weights", "The weights vector should have the same length as x.");
            }

            if (x.Length <= NumberOfWords)
            {
                throw new InvalidOperationException("Not enough data points to cluster. Please try "
                                                    + "to adjust the feature extraction algorithm to generate more points.");
            }

            this.Statistics = new BagOfWordsStatistics()
            {
                TotalNumberOfDescriptors = x.Length,
            };

            return(learn(x, weights));
        }
コード例 #2
0
        /// <summary>
        ///   Generic learn method implementation that should work for any input type.
        ///   This method is useful for re-using code between methods that accept Bitmap,
        ///   BitmapData, UnmanagedImage, filenames as strings, etc.
        /// </summary>
        ///
        /// <typeparam name="T">The input type.</typeparam>
        ///
        /// <param name="x">The inputs.</param>
        /// <param name="weights">The weights.</param>
        /// <param name="extractor">A function that knows how to process the input
        ///   and extract features from them.</param>
        ///
        /// <returns>The trained model.</returns>
        ///
        protected TModel InnerLearn <T>(T[] x, double[] weights,
                                        Func <T, TExtractor, IEnumerable <TPoint> > extractor)
        {
            var descriptorsPerInstance = new TFeature[x.Length][];
            var totalDescriptorCounts  = new double[x.Length];
            int takenDescriptorCount   = 0;

            // For all instances
            For(0, x.Length, (i, detector) =>
            {
                if (NumberOfDescriptors > 0 && takenDescriptorCount >= NumberOfDescriptors)
                {
                    return;
                }

                TFeature[] desc = extractor(x[i], detector).Select(p => p.Descriptor).ToArray();

                totalDescriptorCounts[i] = desc.Length;

                if (MaxDescriptorsPerInstance > 0)
                {
                    desc = desc.Sample(MaxDescriptorsPerInstance);
                }

                Interlocked.Add(ref takenDescriptorCount, desc.Length);

                descriptorsPerInstance[i] = desc;
            });

            if (NumberOfDescriptors >= 0 && takenDescriptorCount < NumberOfDescriptors)
            {
                throw new InvalidOperationException("There were not enough descriptors to sample the desired amount " +
                                                    "of samples ({0}). Please either increase the number of images, or increase the number of ".Format(NumberOfDescriptors) +
                                                    "descriptors that are sampled from each image by adjusting the MaxSamplesPerImage property ({0}).".Format(MaxDescriptorsPerInstance));
            }

            var totalDescriptors = new TFeature[takenDescriptorCount];
            var totalWeights     = weights != null ? new double[takenDescriptorCount] : null;

            int[] instanceIndices = new int[takenDescriptorCount];

            int c = 0, w = 0;

            for (int i = 0; i < descriptorsPerInstance.Length; i++)
            {
                if (descriptorsPerInstance[i] != null)
                {
                    if (weights != null)
                    {
                        totalWeights[w++] = weights[i];
                    }
                    for (int j = 0; j < descriptorsPerInstance[i].Length; j++)
                    {
                        totalDescriptors[c] = descriptorsPerInstance[i][j];
                        instanceIndices[c]  = i;
                        c++;
                    }
                }
            }

            if (NumberOfDescriptors > 0)
            {
                int[] idx = Vector.Sample(NumberOfDescriptors);
                totalDescriptors = totalDescriptors.Get(idx);
                instanceIndices  = instanceIndices.Get(idx);
            }

            int[] hist = instanceIndices.Histogram();

            Debug.Assert(hist.Sum() == (NumberOfDescriptors > 0 ? NumberOfDescriptors : takenDescriptorCount));

            this.Statistics = new BagOfWordsStatistics()
            {
                TotalNumberOfInstances              = x.Length,
                TotalNumberOfDescriptors            = (int)totalDescriptorCounts.Sum(),
                TotalNumberOfDescriptorsPerInstance = NormalDistribution.Estimate(totalDescriptorCounts, new NormalOptions {
                    Robust = true
                }),
                TotalNumberOfDescriptorsPerInstanceRange = new IntRange((int)totalDescriptorCounts.Min(), (int)totalDescriptorCounts.Max()),

                NumberOfInstancesTaken              = hist.Length,
                NumberOfDescriptorsTaken            = totalDescriptors.Length,
                NumberOfDescriptorsTakenPerInstance = NormalDistribution.Estimate(hist.ToDouble(), new NormalOptions {
                    Robust = true
                }),
                NumberOfDescriptorsTakenPerInstanceRange = new IntRange(hist.Min(), hist.Max())
            };

            return(learn(totalDescriptors, totalWeights));
        }