示例#1
0
        public static void GenerateDotnetApiBloomFilter(string clientName, string accessKey)
        {
            double falsePositiveRate = 0.00001;
            string baseUri           = "https://op-dhs-prod-read-nus.azurewebsites.net/";// "https://op-dhs-sandbox-read.azurewebsites.net/"; //

            IDocumentHostingService             dhsClient = new DocumentHostingServiceClient(new Uri(baseUri), clientName, accessKey);
            IList <DepotBloomFilter>            result    = new List <DepotBloomFilter>();
            Dictionary <string, List <string> > conflicts = new Dictionary <string, List <string> >(StringComparer.OrdinalIgnoreCase);
            IList <GetDepotResponse>            depots    = dhsClient.GetAllDepotsBySiteBasePath("docs", "docs.microsoft.com/dotnet/", null, CancellationToken.None).Result;

            foreach (GetDepotResponse depot in depots.Skip(20).Take(1))
            {
                if (depot.SystemMetadata.GetValueOrDefault <bool>(MetadataConstants.ActiveKey))
                {
                    string depotName  = depot.DepotName;
                    string continueAt = null;
                    Console.WriteLine($"{depotName} Start.");
                    IEnumerable <GetDocumentResponse> allDocuments = new List <GetDocumentResponse>();
                    int i = 0;
                    do
                    {
                        for (int retry = 0; retry < 3; i++)
                        {
                            try
                            {
                                GetDocumentsResponse documents = dhsClient.GetDocumentsPaginated(depot.DepotName, "en-us", "live", true, continueAt, null, null, CancellationToken.None).Result;
                                continueAt   = documents.ContinueAt;
                                allDocuments = allDocuments.Concat(documents.Documents);
                                break;
                            }
                            catch (Exception e)
                            {
                                Console.WriteLine(e);
                                Console.WriteLine($"Retry for {retry + 1} times");
                            }
                        }

                        i++;
                        Console.WriteLine($"{i:000} ..................");
                    }while (!string.IsNullOrEmpty(continueAt));
                    Console.WriteLine($"{depotName} Size: {allDocuments.Count()}.");
                    var bloomFilter = new BloomFilter(allDocuments.Count(), falsePositiveRate);
                    foreach (var document in allDocuments)
                    {
                        bloomFilter.Add(document.AssetId);
                    }
                    Console.WriteLine($"{depotName} Bloom Filter Size: {bloomFilter.BitLength / 1024 / 8} KB.");
                    result.Add(new DepotBloomFilter
                    {
                        DepotName         = depotName,
                        BloomFilter       = bloomFilter.BitArray,
                        Count             = allDocuments.Count(),
                        FalsePositiveRate = falsePositiveRate
                    });
                    Console.WriteLine($"{depotName} Done.");
                }
            }

            /*
             * using (StreamWriter file = new StreamWriter(@"output.json", true))
             * {
             *  file.WriteLine(JsonConvert.SerializeObject(result));
             * }
             */

            MemoryStream ms = new MemoryStream();

            using (BsonWriter writer = new BsonWriter(ms))
            {
                JsonSerializer serializer = new JsonSerializer();
                serializer.Serialize(writer, result);
            }

            Console.ReadLine();
        }
示例#2
0
        public static void TestDotnerApiBloomFilters(string clientName, string accessKey)
        {
            double falsePositiveRate = 0.00001;
            string baseUri           = "https://op-dhs-prod-read-nus.azurewebsites.net/";// "https://op-dhs-sandbox-read.azurewebsites.net/"; //

            IDocumentHostingService             dhsClient = new DocumentHostingServiceClient(new Uri(baseUri), clientName, accessKey);
            IList <BloomFilter>                 result    = new List <BloomFilter>();
            IList <string>                      assetIds  = new List <string>();
            Dictionary <string, List <string> > conflicts = new Dictionary <string, List <string> >(StringComparer.OrdinalIgnoreCase);
            IList <GetDepotResponse>            depots    = dhsClient.GetAllDepotsBySiteBasePath("docs", "docs.microsoft.com/dotnet/", null, CancellationToken.None).Result;

            foreach (GetDepotResponse depot in depots)
            {
                if (depot.SystemMetadata.GetValueOrDefault <bool>(MetadataConstants.ActiveKey))
                {
                    string depotName  = depot.DepotName;
                    string continueAt = null;
                    Console.WriteLine($"{depotName} Start.");
                    IEnumerable <GetDocumentResponse> allDocuments = new List <GetDocumentResponse>();
                    int i = 0;
                    do
                    {
                        for (int retry = 0; retry < 3; i++)
                        {
                            try
                            {
                                GetDocumentsResponse documents = dhsClient.GetDocumentsPaginated(depot.DepotName, "en-us", "live", true, continueAt, null, null, CancellationToken.None).Result;
                                continueAt   = documents.ContinueAt;
                                allDocuments = allDocuments.Concat(documents.Documents);
                                break;
                            }
                            catch (Exception e)
                            {
                                Console.WriteLine(e);
                                Console.WriteLine($"Retry for {retry + 1} times");
                            }
                        }

                        i++;
                        Console.WriteLine($"{i:000} ..................");
                    }while (!string.IsNullOrEmpty(continueAt));
                    Console.WriteLine($"{depotName} Size: {allDocuments.Count()}.");
                    var bloomFilter = new BloomFilter(allDocuments.Count(), falsePositiveRate);
                    foreach (var document in allDocuments)
                    {
                        bloomFilter.Add(document.AssetId);
                        assetIds.Add(document.AssetId);
                    }
                    Console.WriteLine($"{depotName} Bloom Filter Size: {bloomFilter.BitLength / 1024 / 8} KB.");
                    result.Add(bloomFilter);
                    Console.WriteLine($"{depotName} Done.");
                }
            }
            using (StreamWriter file = new StreamWriter(@"output.json", true))
            {
                file.WriteLine(JsonConvert.SerializeObject(result));
            }

            int onlyOneCount     = 0;
            var conflictHashDict = new Dictionary <int, int>();

            foreach (var assetId in assetIds)
            {
                var conflictCount = result.Where(r => r.Contains(assetId)).Count();
                if (conflictCount == 1)
                {
                    onlyOneCount++;
                }
                else
                {
                    if (conflictHashDict.ContainsKey(conflictCount))
                    {
                        conflictHashDict[conflictCount] += 1;
                    }
                    else
                    {
                        conflictHashDict[conflictCount] = 1;
                    }
                }
            }
            Console.WriteLine($"Only one count: {onlyOneCount}, total count: {assetIds.Count()}");
            Console.WriteLine($"Duplicate count: {conflictHashDict.Keys.Count}");
            var output = conflictHashDict.OrderBy(p => p.Value).Reverse();

            using (StreamWriter file = new StreamWriter(@"conflictCount.txt", true))
            {
                foreach (var item in output)
                {
                    file.WriteLine($"{item.Key}: {item.Value}");
                }
            }

            Console.ReadLine();
        }
示例#3
0
        public static async Task Migrate(string basePath, string branch, string locale, int top = 3)
        {
            Console.WriteLine($"Get depots for {basePath}");
            var depots = await s_dhsClient.GetAllDepotsBySiteBasePath("Docs", basePath, null, CancellationToken.None);

            var topDepots  = depots.OrderBy(d => d.Priority).Take(top);
            var activeEtag = Guid.NewGuid().ToString();

            await ParallelUtility.ParallelForEach(topDepots, async topDepot =>
            {
                var continueAt = string.Empty;
                var documents  = new List <GetDocumentResponse>();
                var i          = 1;
                do
                {
                    Console.WriteLine($"Load {1000 * i++} documents for {topDepot.DepotName}");
                    var documentsResponse = await s_dhsClient.GetDocumentsPaginated(topDepot.DepotName, locale, branch, false, continueAt, null, 1000, CancellationToken.None);
                    documents.AddRange(documentsResponse.Documents);
                    continueAt = documentsResponse.ContinueAt;
                } while (!string.IsNullOrEmpty(continueAt));

                Console.WriteLine($"Convert {documents.Count} documents for {topDepot.DepotName}");
                var pageDocs = new ConcurrentBag <Document>();
                await ParallelUtility.ParallelForEach(documents, async document =>
                {
                    using (Stream contentStream = await HttpHelper.DownloadAsStream(document.ContentUri, CancellationToken.None))
                    {
                        var(pageUrl, pageHash) = await Writer.UploadPage(contentStream, document.CombinedMetadata.GetValueOrDefault <bool>("is_dynamic_rendering"), document.CombinedMetadata.GetValueOrDefault <string>("content_type"));
                        var pageDoc            = new Document
                        {
                            Docset             = topDepot.DepotName,
                            Url                = $"{basePath}{document.AssetId}",
                            Locale             = locale,
                            Branch             = branch,
                            Monikers           = document.CombinedMetadata.GetValueOrDefault <JArray>("monikers")?.ToObject <List <string> >(),
                            ActiveEtag         = activeEtag,
                            PageHash           = pageHash,
                            PageUrl            = pageUrl,
                            PageType           = document.CombinedMetadata.GetValueOrDefault <string>("page_type"),
                            Title              = document.CombinedMetadata.GetValueOrDefault <string>("title"),
                            Layout             = document.CombinedMetadata.GetValueOrDefault <string>("layout"),
                            IsDynamicRendering = document.CombinedMetadata.GetValueOrDefault <bool>("is_dynamic_rendering"),
                            ContentType        = document.CombinedMetadata.GetValueOrDefault <string>("content_type")
                        };
                        pageDocs.Add(pageDoc);
                    }
                },
                                                      400,
                                                      200,
                                                      (done, total) =>
                {
                    var percent = ((int)(100 * Math.Min(1.0, done / Math.Max(1.0, total)))).ToString();
                    Console.WriteLine($"Uploading Page Content for {topDepot.DepotName}: {percent.PadLeft(3)}% {done}/{total}");
                });

                await Writer.UploadDocuments(pageDocs.ToList(), activeEtag, (done, total) =>
                {
                    var percent = ((int)(100 * Math.Min(1.0, done / Math.Max(1.0, total)))).ToString();
                    Console.WriteLine($"Uploading Page Document for {topDepot.DepotName}: {percent.PadLeft(3)}% {done}/{total}");
                });
            }, 10, 5);
        }