Exemplo n.º 1
0
        public IEnumerable <SimilarityResult> FindSimilar(IBagOfWords bag)
        {
            logger.LogDebug("Searching for similar documents");
            var vector        = encoder.GetFullVector(bag.Words.Select(item => item.Text).ToArray());
            var distanceTable = new Dictionary <IBagOfWords, double?>();

            foreach (var existing in vectorTable)
            {
                distanceTable[existing.Key] = null;
            }

            Parallel.ForEach(vectorTable.Keys.ToArray(),
                             existingDocument =>
            {
                var existing = vectorTable[existingDocument];
                if (existing == null)
                {
                    existing = encoder.GetFullVector(existingDocument.Words.Select(item => item.Text).ToArray());
                    vectorTable[existingDocument] = existing;
                }

                var distance = distanceMeasurer.Measure(vector, existing);
                distanceTable[existingDocument] = distance;
            });

            return(distanceTable.OrderByDescending(item => item.Value)
                   .Where(item => item.Value.HasValue)
                   .Select(item => new SimilarityResult(item.Key, item.Value.Value)));
        }
Exemplo n.º 2
0
 public ContextManager(IContextBuilder contextBuilder, IRepositoryManager repositoryManager,
     ITagger tagger, IBagOfWords bagOfWords)
 {
     this.contextBuilder = contextBuilder;
     this.repositoryManager = repositoryManager;
     this.tagger = tagger;
     this.bagOfWords = bagOfWords;
 }
Exemplo n.º 3
0
        private static void CreateBoW()
        {
            var numberOfWords = 36;

            foreach (var file in Directory.EnumerateFiles(@"C:\Temp\TLLCamerasTestData\37_Training", "*.jpg"))
            {
                var trainingImage = (Bitmap)Bitmap.FromFile(file);

                trainingImages.Add(file, trainingImage);
            }

            foreach (var file in Directory.EnumerateFiles(@"C:\Temp\TLLCamerasTestData\37_Testing", "*.jpg"))
            {
                var testImage = (Bitmap)Bitmap.FromFile(file);

                testingImages.Add(file, testImage);
            }



            // We will use SURF, so we can use a standard clustering
            // algorithm that is based on Euclidean distances. A good
            // algorithm for clustering codewords is the Binary Split
            // variant of the K-Means algorithm.

            // Create a Binary-Split clustering algorithm
            BinarySplit binarySplit = new BinarySplit(numberOfWords);

            // Create bag-of-words (BoW) with the given algorithm
            BagOfVisualWords surfBow = new BagOfVisualWords(binarySplit);

            // Compute the BoW codebook using training images only
            IBagOfWords <Bitmap> bow = surfBow.Learn(trainingImages.Values.ToArray());

            // now that we've created the bow we need to use it to create a representation of each training and test image

            foreach (var trainingImage in trainingImages.Keys)
            {
                var asBitmap = trainingImages[trainingImage] as Bitmap;

                var featureVector = (bow as ITransform <Bitmap, double[]>).Transform(asBitmap);

                var featureString = featureVector.ToString(DefaultArrayFormatProvider.InvariantCulture);

                trainingFeatures.Add(trainingImage, featureVector);
            }

            foreach (var testingImage in testingImages.Keys)
            {
                var asBitmap = testingImages[testingImage] as Bitmap;

                var featureVector = (bow as ITransform <Bitmap, double[]>).Transform(asBitmap);

                var featureString = featureVector.ToString(DefaultArrayFormatProvider.InvariantCulture);

                testingFeatures.Add(testingImage, featureVector);
            }
        }
Exemplo n.º 4
0
        public void Register(IBagOfWords bag)
        {
            if (bag == null)
            {
                throw new ArgumentNullException(nameof(bag));
            }

            foreach (var bagWord in bag.Words)
            {
                encoder.AddWord(bagWord.Text);
            }

            vectorTable[bag] = null;
        }
Exemplo n.º 5
0
 public SimilarityResult(IBagOfWords document, double similarity)
 {
     Document   = document ?? throw new ArgumentNullException(nameof(document));
     Similarity = similarity;
 }
Exemplo n.º 6
0
 private double[] GetData(Bitmap bitmap, IBagOfWords <Bitmap> bow)
 => (bow as ITransform <Bitmap, double[]>).Transform(bitmap);