private static Cluster OptimizeRange(SvdEntities context, ClusterCalculation clusterCalculationEntity) { try { var randGen = new Random(); Cluster.SetCalculationStatus(context, clusterCalculationEntity, Contracts.ClusterCalculationStatus.Clustering); var clusters = (from k in Enumerable.Range(clusterCalculationEntity.MinimumClusterCount, (clusterCalculationEntity.MaximumClusterCount - clusterCalculationEntity.MinimumClusterCount) + 1) select Optimize(randGen, clusterCalculationEntity.JobId, k, clusterCalculationEntity.IterationsPerCluster, clusterCalculationEntity.MaximumOptimizationsCount)).ToList(); var optimizedCluster = clusters .OrderByDescending(c => c.GlobalSi) .ThenByDescending(c => c.GlobalClusterSiAverage).First(); optimizedCluster.Save(context, clusterCalculationEntity); return(optimizedCluster); } catch (Exception) { Cluster.SetCalculationStatus(context, clusterCalculationEntity, Contracts.ClusterCalculationStatus.Failed); throw; } }
public static void SetCalculationStatus(SvdEntities context, ClusterCalculation clusterCalculationEntity, Contracts.ClusterCalculationStatus status) { if (status == Contracts.ClusterCalculationStatus.New) { clusterCalculationEntity.Created = DateTime.Now; } if (status == Contracts.ClusterCalculationStatus.Completed || status == Contracts.ClusterCalculationStatus.Failed) { clusterCalculationEntity.Completed = DateTime.Now; } clusterCalculationEntity.Status = status; context.SaveChanges(); }
public void Save(SvdEntities context, ClusterCalculation clusterCalculationEntity) { var binaryFormatter = new BinaryFormatter(); var jobDocs = context.JobDocuments.Where(jd => jd.JobId == JobId).ToLookup(jd => jd.OrdinalIndex); var jobTerms = context.JobTerms.Where(jd => jd.JobId == JobId).ToLookup(jt => jt.Term.Value); var clusterEntities = new Dictionary <int, Engine.Cluster>(); clusterCalculationEntity.ClusterCount = Clusters; clusterCalculationEntity.GlobalSi = GlobalSi; clusterCalculationEntity.ClusterSi = GlobalClusterSiAverage; // Update Cluster Calculation context.SaveChanges(); Enumerable.Range(0, Clusters).ToList().ForEach(cluster => { using (var memoryStreamCenterVector = new MemoryStream()) { binaryFormatter.Serialize(memoryStreamCenterVector, Centers[cluster]); memoryStreamCenterVector.Position = 0; clusterEntities.Add(cluster, new Engine.Cluster() { JobId = JobId, ClusterCalculationId = clusterCalculationEntity.Id, Si = ClusterSiAverages[cluster], CenterVectorSerialized = memoryStreamCenterVector.ToArray() }); } }); // Insert Clusters context.BulkInsert(clusterEntities.Select(kvp => kvp.Value)); var clusterJobDocumentEntities = new ConcurrentBag <ClusterJobDocument>(); var clusterJobTermEntities = new ConcurrentBag <ClusterJobTerm>(); clusterEntities.AsParallel().ForAll(clusterEntity => { using (var memoryStreamCenterVector = new MemoryStream()) { var termDistanceMap = new Dictionary <string, float>(); var centerVector = Centers[clusterEntity.Key]; foreach (var kvp in ClusterMap.Where(kvp => kvp.Value == clusterEntity.Key)) { var docIndex = kvp.Key; var jobDocument = jobDocs[docIndex]; if (jobDocument != null) { clusterJobDocumentEntities.Add(new ClusterJobDocument() { ClusterCalculationId = clusterCalculationEntity.Id, ClusterId = clusterEntity.Value.Id, JobId = JobId, Si = DocumentSi.ContainsKey(docIndex) ? DocumentSi[docIndex] : 0, JobDocumentId = jobDocument.First().Id }); } } for (var i = 0; i < LSA.MatrixContainer.UMatrix.RowCount; i++) { termDistanceMap[LSA.MatrixContainer.Terms[i]] = Distance.Cosine(centerVector, LSA.MatrixContainer.UMatrix.Row(i).ToArray()); } foreach (var term in termDistanceMap.OrderBy(t => t.Value).Take(20)) { var jobTermLookup = jobTerms[term.Key]; if (jobTermLookup != null) { clusterJobTermEntities.Add(new ClusterJobTerm() { ClusterCalculationId = clusterCalculationEntity.Id, ClusterId = clusterEntity.Value.Id, JobId = JobId, JobTermId = jobTermLookup.First().Id, DistanceToClusterCenter = term.Value }); } } } }); // Insert Cluster Documents & Terms context.BulkInsert(clusterJobTermEntities); context.BulkInsert(clusterJobDocumentEntities); SetCalculationStatus(context, clusterCalculationEntity, Contracts.ClusterCalculationStatus.Completed); }