Esempio n. 1
0
        private static Cluster OptimizeRange(SvdEntities context, ClusterCalculation clusterCalculationEntity)
        {
            try
            {
                var randGen = new Random();

                Cluster.SetCalculationStatus(context, clusterCalculationEntity, Contracts.ClusterCalculationStatus.Clustering);

                var clusters = (from k in Enumerable.Range(clusterCalculationEntity.MinimumClusterCount, (clusterCalculationEntity.MaximumClusterCount - clusterCalculationEntity.MinimumClusterCount) + 1)
                                select Optimize(randGen, clusterCalculationEntity.JobId, k, clusterCalculationEntity.IterationsPerCluster, clusterCalculationEntity.MaximumOptimizationsCount)).ToList();

                var optimizedCluster = clusters
                                       .OrderByDescending(c => c.GlobalSi)
                                       .ThenByDescending(c => c.GlobalClusterSiAverage).First();

                optimizedCluster.Save(context, clusterCalculationEntity);

                return(optimizedCluster);
            }
            catch (Exception)
            {
                Cluster.SetCalculationStatus(context, clusterCalculationEntity, Contracts.ClusterCalculationStatus.Failed);
                throw;
            }
        }
Esempio n. 2
0
        public static void SetCalculationStatus(SvdEntities context, ClusterCalculation clusterCalculationEntity, Contracts.ClusterCalculationStatus status)
        {
            if (status == Contracts.ClusterCalculationStatus.New)
            {
                clusterCalculationEntity.Created = DateTime.Now;
            }

            if (status == Contracts.ClusterCalculationStatus.Completed || status == Contracts.ClusterCalculationStatus.Failed)
            {
                clusterCalculationEntity.Completed = DateTime.Now;
            }

            clusterCalculationEntity.Status = status;
            context.SaveChanges();
        }
Esempio n. 3
0
        public void Save(SvdEntities context, ClusterCalculation clusterCalculationEntity)
        {
            var binaryFormatter = new BinaryFormatter();

            var jobDocs         = context.JobDocuments.Where(jd => jd.JobId == JobId).ToLookup(jd => jd.OrdinalIndex);
            var jobTerms        = context.JobTerms.Where(jd => jd.JobId == JobId).ToLookup(jt => jt.Term.Value);
            var clusterEntities = new Dictionary <int, Engine.Cluster>();

            clusterCalculationEntity.ClusterCount = Clusters;
            clusterCalculationEntity.GlobalSi     = GlobalSi;
            clusterCalculationEntity.ClusterSi    = GlobalClusterSiAverage;

            // Update Cluster Calculation
            context.SaveChanges();

            Enumerable.Range(0, Clusters).ToList().ForEach(cluster =>
            {
                using (var memoryStreamCenterVector = new MemoryStream())
                {
                    binaryFormatter.Serialize(memoryStreamCenterVector, Centers[cluster]);

                    memoryStreamCenterVector.Position = 0;

                    clusterEntities.Add(cluster, new Engine.Cluster()
                    {
                        JobId = JobId,
                        ClusterCalculationId = clusterCalculationEntity.Id,
                        Si = ClusterSiAverages[cluster],
                        CenterVectorSerialized = memoryStreamCenterVector.ToArray()
                    });
                }
            });

            // Insert Clusters
            context.BulkInsert(clusterEntities.Select(kvp => kvp.Value));

            var clusterJobDocumentEntities = new ConcurrentBag <ClusterJobDocument>();
            var clusterJobTermEntities     = new ConcurrentBag <ClusterJobTerm>();

            clusterEntities.AsParallel().ForAll(clusterEntity =>
            {
                using (var memoryStreamCenterVector = new MemoryStream())
                {
                    var termDistanceMap = new Dictionary <string, float>();
                    var centerVector    = Centers[clusterEntity.Key];

                    foreach (var kvp in ClusterMap.Where(kvp => kvp.Value == clusterEntity.Key))
                    {
                        var docIndex    = kvp.Key;
                        var jobDocument = jobDocs[docIndex];

                        if (jobDocument != null)
                        {
                            clusterJobDocumentEntities.Add(new ClusterJobDocument()
                            {
                                ClusterCalculationId = clusterCalculationEntity.Id,
                                ClusterId            = clusterEntity.Value.Id,
                                JobId         = JobId,
                                Si            = DocumentSi.ContainsKey(docIndex) ? DocumentSi[docIndex] : 0,
                                JobDocumentId = jobDocument.First().Id
                            });
                        }
                    }

                    for (var i = 0; i < LSA.MatrixContainer.UMatrix.RowCount; i++)
                    {
                        termDistanceMap[LSA.MatrixContainer.Terms[i]] = Distance.Cosine(centerVector, LSA.MatrixContainer.UMatrix.Row(i).ToArray());
                    }

                    foreach (var term in termDistanceMap.OrderBy(t => t.Value).Take(20))
                    {
                        var jobTermLookup = jobTerms[term.Key];

                        if (jobTermLookup != null)
                        {
                            clusterJobTermEntities.Add(new ClusterJobTerm()
                            {
                                ClusterCalculationId = clusterCalculationEntity.Id,
                                ClusterId            = clusterEntity.Value.Id,
                                JobId     = JobId,
                                JobTermId = jobTermLookup.First().Id,
                                DistanceToClusterCenter = term.Value
                            });
                        }
                    }
                }
            });

            // Insert Cluster Documents & Terms
            context.BulkInsert(clusterJobTermEntities);
            context.BulkInsert(clusterJobDocumentEntities);

            SetCalculationStatus(context, clusterCalculationEntity, Contracts.ClusterCalculationStatus.Completed);
        }