コード例 #1
0
        public void SearchForDuplicates(Action <Guid, IEnumerable <Guid> > onCollision)
        {
            var query = new TableQuery
            {
                SelectColumns = columns
            };

            TableContinuationToken token = null;

            var buffers = new List <IdHashBuffer>();
            var buffer  = new IdHashBuffer(InitialBufferSize);

            buffers.Add(buffer);

            var collisionBytes = new byte[4096];
            var ms             = new MemoryStream(collisionBytes);
            var sw             = new StreamWriter(ms);

            do
            {
                var executeQuerySegmented = table.ExecuteQuerySegmented(query, token);
                foreach (var dte in executeQuerySegmented.Results)
                {
                    var            id = Guid.Parse(dte.PartitionKey);
                    EntityProperty property;
                    if (dte.Properties.TryGetValue(indexPropertyName, out property))
                    {
                        var hash = hashingTransformer(property.PropertyAsObject);

                        if (buffer.TryWrite(id, hash) == false)
                        {
                            buffer = new IdHashBuffer(buffer.Size * 2);
                            buffers.Add(buffer);

                            if (buffer.TryWrite(id, hash) == false)
                            {
                                throw new OutOfMemoryException();
                            }
                        }
                    }
                }
                token = executeQuerySegmented.ContinuationToken;
            } while (token != null);

            foreach (var b in buffers)
            {
                b.Seal();
            }

            for (var i = 0; i < buffers.Count; i++)
            {
                var b = buffers[i];
                b.FindHashCollisions(buffers.Skip(i), (hash, ids) =>
                {
                    var collisions = ids.Select(id => new TableQuery
                    {
                        FilterString  = TableQuery.GenerateFilterCondition("PartitionKey", QueryComparisons.Equal, id.ToString()),
                        SelectColumns = columns
                    })
                                     .Select(q => table.ExecuteQuery(q).SingleOrDefault())
                                     .Where(dte => dte != null && dte.Properties.ContainsKey(indexPropertyName))
                                     .GroupBy(dte => dte.Properties[indexPropertyName].PropertyAsObject, dte => Guid.Parse(dte.PartitionKey), equalityComparer)
                                     .Where(g => g.Count() > 1)
                                     .ToArray();

                    foreach (var collision in collisions)
                    {
                        JsonSerializer.Create().Serialize(sw, collision.Key);

                        sw.Flush();
                        var guid    = new Guid(MD5.Create().ComputeHash(collisionBytes, 0, (int)ms.Position));
                        ms.Position = 0;

                        onCollision(guid, collision);
                    }
                });
            }
        }
コード例 #2
0
        public void SearchForDuplicates(Action<Guid, IEnumerable<Guid>> onCollision)
        {
            var query = new TableQuery
            {
                SelectColumns = columns
            };

            TableContinuationToken token = null;

            var buffers = new List<IdHashBuffer>();
            var buffer = new IdHashBuffer(InitialBufferSize);
            buffers.Add(buffer);

            var collisionBytes = new byte[4096];
            var ms = new MemoryStream(collisionBytes);
            var sw = new StreamWriter(ms);

            do
            {
                var executeQuerySegmented = table.ExecuteQuerySegmented(query, token);
                foreach (var dte in executeQuerySegmented.Results)
                {
                    var id = Guid.Parse(dte.PartitionKey);
                    EntityProperty property;
                    if (dte.Properties.TryGetValue(indexPropertyName, out property))
                    {
                        var hash = hashingTransformer(property.PropertyAsObject);

                        if (buffer.TryWrite(id, hash) == false)
                        {
                            buffer = new IdHashBuffer(buffer.Size*2);
                            buffers.Add(buffer);

                            if (buffer.TryWrite(id, hash) == false)
                            {
                                throw new OutOfMemoryException();
                            }
                        }
                    }
                }
                token = executeQuerySegmented.ContinuationToken;
            } while (token != null);

            foreach (var b in buffers)
            {
                b.Seal();
            }

            for (var i = 0; i < buffers.Count; i++)
            {
                var b = buffers[i];
                b.FindHashCollisions(buffers.Skip(i), (hash, ids) =>
                {
                    var collisions = ids.Select(id => new TableQuery
                    {
                        FilterString = TableQuery.GenerateFilterCondition("PartitionKey", QueryComparisons.Equal, id.ToString()),
                        SelectColumns = columns
                    })
                        .Select(q => table.ExecuteQuery(q).SingleOrDefault())
                        .Where(dte => dte != null && dte.Properties.ContainsKey(indexPropertyName))
                        .GroupBy(dte => dte.Properties[indexPropertyName].PropertyAsObject, dte => Guid.Parse(dte.PartitionKey), equalityComparer)
                        .Where(g => g.Count() > 1)
                        .ToArray();

                    foreach (var collision in collisions)
                    {
                        JsonSerializer.Create().Serialize(sw, collision.Key);

                        sw.Flush();
                        var guid = new Guid(MD5.Create().ComputeHash(collisionBytes, 0, (int) ms.Position));
                        ms.Position = 0;

                        onCollision(guid, collision);
                    }
                });
            }
        }