public IEnumerable <SimilarityResult> FindSimilar(IBagOfWords bag) { logger.LogDebug("Searching for similar documents"); var vector = encoder.GetFullVector(bag.Words.Select(item => item.Text).ToArray()); var distanceTable = new Dictionary <IBagOfWords, double?>(); foreach (var existing in vectorTable) { distanceTable[existing.Key] = null; } Parallel.ForEach(vectorTable.Keys.ToArray(), existingDocument => { var existing = vectorTable[existingDocument]; if (existing == null) { existing = encoder.GetFullVector(existingDocument.Words.Select(item => item.Text).ToArray()); vectorTable[existingDocument] = existing; } var distance = distanceMeasurer.Measure(vector, existing); distanceTable[existingDocument] = distance; }); return(distanceTable.OrderByDescending(item => item.Value) .Where(item => item.Value.HasValue) .Select(item => new SimilarityResult(item.Key, item.Value.Value))); }
public ContextManager(IContextBuilder contextBuilder, IRepositoryManager repositoryManager, ITagger tagger, IBagOfWords bagOfWords) { this.contextBuilder = contextBuilder; this.repositoryManager = repositoryManager; this.tagger = tagger; this.bagOfWords = bagOfWords; }
private static void CreateBoW() { var numberOfWords = 36; foreach (var file in Directory.EnumerateFiles(@"C:\Temp\TLLCamerasTestData\37_Training", "*.jpg")) { var trainingImage = (Bitmap)Bitmap.FromFile(file); trainingImages.Add(file, trainingImage); } foreach (var file in Directory.EnumerateFiles(@"C:\Temp\TLLCamerasTestData\37_Testing", "*.jpg")) { var testImage = (Bitmap)Bitmap.FromFile(file); testingImages.Add(file, testImage); } // We will use SURF, so we can use a standard clustering // algorithm that is based on Euclidean distances. A good // algorithm for clustering codewords is the Binary Split // variant of the K-Means algorithm. // Create a Binary-Split clustering algorithm BinarySplit binarySplit = new BinarySplit(numberOfWords); // Create bag-of-words (BoW) with the given algorithm BagOfVisualWords surfBow = new BagOfVisualWords(binarySplit); // Compute the BoW codebook using training images only IBagOfWords <Bitmap> bow = surfBow.Learn(trainingImages.Values.ToArray()); // now that we've created the bow we need to use it to create a representation of each training and test image foreach (var trainingImage in trainingImages.Keys) { var asBitmap = trainingImages[trainingImage] as Bitmap; var featureVector = (bow as ITransform <Bitmap, double[]>).Transform(asBitmap); var featureString = featureVector.ToString(DefaultArrayFormatProvider.InvariantCulture); trainingFeatures.Add(trainingImage, featureVector); } foreach (var testingImage in testingImages.Keys) { var asBitmap = testingImages[testingImage] as Bitmap; var featureVector = (bow as ITransform <Bitmap, double[]>).Transform(asBitmap); var featureString = featureVector.ToString(DefaultArrayFormatProvider.InvariantCulture); testingFeatures.Add(testingImage, featureVector); } }
public void Register(IBagOfWords bag) { if (bag == null) { throw new ArgumentNullException(nameof(bag)); } foreach (var bagWord in bag.Words) { encoder.AddWord(bagWord.Text); } vectorTable[bag] = null; }
public SimilarityResult(IBagOfWords document, double similarity) { Document = document ?? throw new ArgumentNullException(nameof(document)); Similarity = similarity; }
private double[] GetData(Bitmap bitmap, IBagOfWords <Bitmap> bow) => (bow as ITransform <Bitmap, double[]>).Transform(bitmap);