void TransferDocuments() { // set up the indexes _legacyIndexClient = new SearchIndexClient(Settings.LegacySearchServiceName, Settings.LegacySearchIndex, _legacySearchClient.Credentials); _targetIndexClient = new SearchIndexClient(Settings.TargetSearchServiceName, Settings.TargetSearchIndex, _targetSearchClient.Credentials); long totaldocs = _legacyIndexClient.Documents.Count().DocumentCount; Console.WriteLine(String.Format("Found {0} documents to transfer", totaldocs.ToString())); Console.WriteLine("Indexing"); // create an indexer to put the new docs in for us var indexer = new List<Microsoft.Azure.Search.Models.IndexAction>(); // now get all the docs, enumerate and push into new index // Liam tells us that things could go wrong if docs are added during this => pause indexing. // We get FULL docs back in groups of 50, by default, but i guess this could change - be good to have a queryable "safe" number // store anything that screws up List<string> failures = new List<string>(); // get groups of PAGE_SIZE and push int maxpages = (int)(totaldocs / PAGE_SIZE) + 1; for (int i = 0; i < maxpages; i++) { Console.Write("*"); // search config as it brings back 50 at a time Microsoft.Azure.Search.Models.SearchParameters sparams = new Microsoft.Azure.Search.Models.SearchParameters(); sparams.Top = PAGE_SIZE; sparams.Skip = i * PAGE_SIZE; // get all in that page var documents = _legacyIndexClient.Documents.Search("*", sparams).Results; // get the old docs transposed foreach (var olddoc in documents) { var document = new Microsoft.Azure.Search.Models.Document(); foreach (var key in olddoc.Document.Keys) { object value; if (olddoc.Document.TryGetValue(key, out value)) { document.Add(key, value); } else { failures.Add(key); } } if (FULL_TRACE) Console.WriteLine(String.Format("Indexed {0} ({1})", document[TITLE_FIELD], document[KEY_FIELD])); // now add to the indexer as a new item indexer.Add(new Microsoft.Azure.Search.Models.IndexAction( Microsoft.Azure.Search.Models.IndexActionType.Upload, document)); } if (!DO_NOT_COMMIT_DOCUMENTS) { //now get the indexer to batch import _targetIndexClient.Documents.Index(new Microsoft.Azure.Search.Models.IndexBatch(indexer)); } // reset and go again indexer.Clear(); } Console.WriteLine(String.Empty); Console.WriteLine(String.Format("Done. Short delay to let the indexing complete <queue music>")); // set a timeout so the indexing can complete before we check the count System.Threading.Thread.Sleep(FINAL_CHECK_INDEXING_TIMEOUT_SECONDS * 1000); // were all documents indexed? if (totaldocs == _targetIndexClient.Documents.Count().DocumentCount) { Console.WriteLine(String.Format("ALL DOCUMENTS INDEXED! Found {0} documents in the new index.", _targetIndexClient.Documents.Count().DocumentCount.ToString())); } else { Console.WriteLine(String.Format("Found {0} documents in the new index", _targetIndexClient.Documents.Count().DocumentCount.ToString())); if (failures.Count > 0) { Console.WriteLine("The following were not indexed:"); foreach (var item in failures) { Console.WriteLine(item); } } } }
void TransferDocuments() { // set up the indexes _legacyIndexClient = new SearchIndexClient(Settings.LegacySearchServiceName, Settings.LegacySearchIndex, _legacySearchClient.Credentials); _targetIndexClient = new SearchIndexClient(Settings.TargetSearchServiceName, Settings.TargetSearchIndex, _targetSearchClient.Credentials); long totaldocs = _legacyIndexClient.Documents.Count().DocumentCount; Console.WriteLine(String.Format("Found {0} documents to transfer", totaldocs.ToString())); Console.WriteLine("Indexing"); // create an indexer to put the new docs in for us var indexer = new List <Microsoft.Azure.Search.Models.IndexAction>(); // now get all the docs, enumerate and push into new index // Liam tells us that things could go wrong if docs are added during this => pause indexing. // We get FULL docs back in groups of 50, by default, but i guess this could change - be good to have a queryable "safe" number // store anything that screws up List <string> failures = new List <string>(); // get groups of PAGE_SIZE and push int maxpages = (int)(totaldocs / PAGE_SIZE) + 1; for (int i = 0; i < maxpages; i++) { Console.Write("*"); // search config as it brings back 50 at a time Microsoft.Azure.Search.Models.SearchParameters sparams = new Microsoft.Azure.Search.Models.SearchParameters(); sparams.Top = PAGE_SIZE; sparams.Skip = i * PAGE_SIZE; // get all in that page var documents = _legacyIndexClient.Documents.Search("*", sparams).Results; // get the old docs transposed foreach (var olddoc in documents) { var document = new Microsoft.Azure.Search.Models.Document(); foreach (var key in olddoc.Document.Keys) { object value; if (olddoc.Document.TryGetValue(key, out value)) { document.Add(key, value); } else { failures.Add(key); } } if (FULL_TRACE) { Console.WriteLine(String.Format("Indexed {0} ({1})", document[TITLE_FIELD], document[KEY_FIELD])); } // now add to the indexer as a new item indexer.Add(new Microsoft.Azure.Search.Models.IndexAction( Microsoft.Azure.Search.Models.IndexActionType.Upload, document)); } if (!DO_NOT_COMMIT_DOCUMENTS) { //now get the indexer to batch import _targetIndexClient.Documents.Index(new Microsoft.Azure.Search.Models.IndexBatch(indexer)); } // reset and go again indexer.Clear(); } Console.WriteLine(String.Empty); Console.WriteLine(String.Format("Done. Short delay to let the indexing complete <queue music>")); // set a timeout so the indexing can complete before we check the count System.Threading.Thread.Sleep(FINAL_CHECK_INDEXING_TIMEOUT_SECONDS * 1000); // were all documents indexed? if (totaldocs == _targetIndexClient.Documents.Count().DocumentCount) { Console.WriteLine(String.Format("ALL DOCUMENTS INDEXED! Found {0} documents in the new index.", _targetIndexClient.Documents.Count().DocumentCount.ToString())); } else { Console.WriteLine(String.Format("Found {0} documents in the new index", _targetIndexClient.Documents.Count().DocumentCount.ToString())); if (failures.Count > 0) { Console.WriteLine("The following were not indexed:"); foreach (var item in failures) { Console.WriteLine(item); } } } }