public DocumentDescriptorId FindDuplicateDocumentId( DocumentDescriptorId sourceDocumentId, FileHash sourceHash, BlobId sourceBlobId ) { if (!_config.IsDeduplicationActive) return null; var original = _blobStore.GetDescriptor(sourceBlobId); var matches = _hashReader.FindDocumentByHash(sourceHash); Logger.DebugFormat("Deduplicating document {0}", sourceDocumentId); foreach (var match in matches) { if (match.DocumentDescriptorId == sourceDocumentId) continue; Logger.DebugFormat("Checking document {0}", match.DocumentDescriptorId); var candidate = this._blobStore.GetDescriptor(match.BlobId); // only within same content type! if (candidate.ContentType != original.ContentType) { Logger.DebugFormat("document {0} has different ContentType ({1}), skipping", match.DocumentDescriptorId, candidate.ContentType ); continue; } // and same length if (candidate.Length != original.Length) { Logger.DebugFormat("document {0} has different length ({1}), skipping", match.DocumentDescriptorId, candidate.Length ); continue; } // binary check using (var candidateStream = candidate.OpenRead()) using (var originalStream = original.OpenRead()) { if (StreamHelper.StreamsContentsAreEqual(candidateStream, originalStream)) { Logger.DebugFormat("{0} has same content of {1}: match found!", match.DocumentDescriptorId, sourceDocumentId ); return match.DocumentDescriptorId; } else { Logger.DebugFormat("{0} has different content of {1}, skipping", match.DocumentDescriptorId, sourceDocumentId ); } } } return null; }
public IEnumerable<Match> FindDocumentByHash(FileHash hash) { return _reader.Collection .Find(Builders<DocumentDescriptorReadModel>.Filter.Eq(x => x.Hash, hash)) .Sort(Builders<DocumentDescriptorReadModel>.Sort.Ascending(x=>x.SequenceNumber)) .ToList() .Select(x => new Match(x.Id, x.GetOriginalBlobId())); }
public DocumentDescriptorInitialized( BlobId blobId, DocumentHandleInfo handleInfo, FileHash hash) { Hash = hash; HandleInfo = handleInfo; BlobId = blobId; }
public InitializeDocumentDescriptor( DocumentDescriptorId aggregateId, BlobId blobId, DocumentHandleInfo handleInfo, FileHash hash, FileNameWithExtension fileName ) : base(aggregateId) { FileName = fileName; Hash = hash; BlobId = blobId; HandleInfo = handleInfo; }
public InitializeDocumentDescriptorAsAttach( DocumentDescriptorId aggregateId, BlobId blobId, DocumentHandleInfo handleInfo, DocumentHandle fatherHandle, DocumentDescriptorId fatherDocumentDescriptorId, FileHash hash, FileNameWithExtension fileName) : base(aggregateId, blobId, handleInfo, hash, fileName) { FatherHandle = fatherHandle; FatherDocumentDescriptorId = fatherDocumentDescriptorId; }