static void Main(string[] args)
        {
            // Ingest data from Mahout outout and merge into Azure Search
            string line;
            int currentID = -1;
            int docCounter = 0;
            List<Recommendation> recList = new List<Recommendation>();
            List<SearchIndexSchema> sisList = new List<SearchIndexSchema>();
            List<string> recs = new List<string>();

            // Configure cloud storage and search connections
            StorageCredentials credentials = new StorageCredentials(StorageAccountName, StorageApiKey);
            CloudStorageAccount storageAccount = new CloudStorageAccount(credentials, true);
            CloudBlobClient blobClient = storageAccount.CreateCloudBlobClient();
            CloudBlobContainer container = blobClient.GetContainerReference(StorageContainer);
            CloudBlockBlob blob = container.GetBlockBlobReference("output\\part-r-00000");  // In large implementations there may be multiple part files
            serviceClient = new SearchServiceClient(SearchServiceName, new SearchCredentials(SearchApiKey));
            indexClient = serviceClient.Indexes.GetClient(IndexName);

            // Open and parse mahout output file
            using (var stream = blob.OpenRead())
            {
                using (StreamReader file = new StreamReader(stream))
                {
                    while ((line = file.ReadLine()) != null)
                    {
                        Recommendation rec = new Recommendation();
                        char[] delimiters = new char[] { '\t' };
                        string[] parts = line.Split(delimiters, StringSplitOptions.RemoveEmptyEntries);
                        rec.itemID = Convert.ToInt32(parts[0]);
                        rec.recItemID = Convert.ToInt32(parts[1]);
                        rec.percentSimilar = Convert.ToDouble(parts[2]);
                        if (currentID != rec.itemID)
                        {
                            docCounter++;
                            if (recList.Count > 0)
                            {
                                recList = recList.OrderByDescending(w => w.percentSimilar).Take(5).ToList();    // Take the 5 most similar items
                                foreach (var item in recList)
                                    recs.Add(item.recItemID.ToString());
                                recList.Clear();
                                sisList.Add(new SearchIndexSchema { id = currentID.ToString(), recommendations = recs.ToArray() });
                                recs.Clear();
                                if (sisList.Count == 500)
                                {
                                    MergeDocument(sisList);
                                    sisList.Clear();
                                    Console.WriteLine("{0} Docs written to Index...", docCounter - 1);
                                }
                            }
                            currentID = rec.itemID;
                        }
                        recList.Add(rec);
                    }
                    file.Close();
                    if (sisList.Count > 0)
                    {
                        MergeDocument(sisList);
                        sisList.Clear();
                        Console.WriteLine("{0} Docs written to Index...", docCounter - 1);
                    }
                }
            }
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }
Exemple #2
0
        static void Main(string[] args)
        {
            // Ingest data from Mahout outout and merge into Azure Search
            string line;
            int    currentID  = -1;
            int    docCounter = 0;
            List <Recommendation>    recList = new List <Recommendation>();
            List <SearchIndexSchema> sisList = new List <SearchIndexSchema>();
            List <string>            recs    = new List <string>();

            // Configure cloud storage and search connections
            StorageCredentials  credentials    = new StorageCredentials(StorageAccountName, StorageApiKey);
            CloudStorageAccount storageAccount = new CloudStorageAccount(credentials, true);
            CloudBlobClient     blobClient     = storageAccount.CreateCloudBlobClient();
            CloudBlobContainer  container      = blobClient.GetContainerReference(StorageContainer);
            CloudBlockBlob      blob           = container.GetBlockBlobReference("output\\part-r-00000"); // In large implementations there may be multiple part files

            serviceClient = new SearchServiceClient(SearchServiceName, new SearchCredentials(SearchApiKey));
            indexClient   = serviceClient.Indexes.GetClient(IndexName);

            // Open and parse mahout output file
            using (var stream = blob.OpenRead())
            {
                using (StreamReader file = new StreamReader(stream))
                {
                    while ((line = file.ReadLine()) != null)
                    {
                        Recommendation rec        = new Recommendation();
                        char[]         delimiters = new char[] { '\t' };
                        string[]       parts      = line.Split(delimiters, StringSplitOptions.RemoveEmptyEntries);
                        rec.itemID         = Convert.ToInt32(parts[0]);
                        rec.recItemID      = Convert.ToInt32(parts[1]);
                        rec.percentSimilar = Convert.ToDouble(parts[2]);
                        if (currentID != rec.itemID)
                        {
                            docCounter++;
                            if (recList.Count > 0)
                            {
                                recList = recList.OrderByDescending(w => w.percentSimilar).Take(5).ToList();    // Take the 5 most similar items
                                foreach (var item in recList)
                                {
                                    recs.Add(item.recItemID.ToString());
                                }
                                recList.Clear();
                                sisList.Add(new SearchIndexSchema {
                                    id = currentID.ToString(), recommendations = recs.ToArray()
                                });
                                recs.Clear();
                                if (sisList.Count == 500)
                                {
                                    MergeDocument(sisList);
                                    sisList.Clear();
                                    Console.WriteLine("{0} Docs written to Index...", docCounter - 1);
                                }
                            }
                            currentID = rec.itemID;
                        }
                        recList.Add(rec);
                    }
                    file.Close();
                    if (sisList.Count > 0)
                    {
                        MergeDocument(sisList);
                        sisList.Clear();
                        Console.WriteLine("{0} Docs written to Index...", docCounter - 1);
                    }
                }
            }
            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }