Example #1
0
        /// <summary>
        /// Парралельная обработка уникальных векторов по дистанции
        /// </summary>
        /// <param name="dataSets"></param>
        /// <param name="path"></param>
        /// <param name="flows">Кол-во потоков</param>
        /// <returns></returns>
        public IEnumerable <DataSets> CheckUniqueVectorsParallel(List <DataSets> dataSets, string path, int flows = 2)
        {
            var vocabulary  = new Hash2VecTextReader().Read(path);
            var wordsLength = vocabulary.Words.Length;

            Data.NewDataSets = new List <DataSets>(dataSets);
            var threadList = new List <Thread>();

            var offset     = wordsLength / flows;
            var difference = wordsLength - offset * flows;

            for (int i = 0, index = 1, bias = 0; bias < wordsLength; i = i + offset, index++)
            {
                if (index == 2)
                {
                    i++;
                }
                bias = index != flows ? offset * index : offset * index + difference;
                var uniqueVectors = new Core.UniqueVectors(vocabulary, dataSets, i, bias);
                threadList.Add(new Thread(uniqueVectors.GetUniqueVectors)
                {
                    Name = $"Thread {index}"
                });
            }

            threadList.ForEach(thread => thread.Start());

            Wait(threadList);

            return(Data.NewDataSets);
        }
Example #2
0
        public List <DataSets> CheckUniqueVectors(List <DataSets> dataSets, string path)
        {
            var vocabulary  = new Hash2VecTextReader().Read(path);
            var newDataSets = new List <DataSets>(dataSets);
            int count       = 0;

            using (var progressBar = new ProgressBar())
            {
                foreach (var representation in vocabulary.Words)
                {
                    var distanceList     = vocabulary.Distance(representation, 3, 2).ToList();
                    var vectorsDuplicats = distanceList.AsParallel().Where(dis => dis.DistanceValue >= 0.95);

                    var vectorsEnumerable = vectorsDuplicats.AsParallel().SelectMany(vec =>
                                                                                     dataSets.Where(data => EqualsVectors(data.Vectors, vec.Representation.NumericVector))).ToList();

                    var vector = newDataSets.AsParallel()
                                 .FirstOrDefault(vec => EqualsVectors(vec.Vectors, representation.NumericVector));

                    if (vector != null)
                    {
                        newDataSets.Remove(vector);
                        Parallel.ForEach(vectorsEnumerable, vec =>
                        {
                            for (var i = 0; i < vector.Ideals.Length; i++)
                            {
                                if (vector.Ideals[i] < vec.Ideals[i])
                                {
                                    vector.Ideals[i] = vec.Ideals[i];
                                }
                            }
                        });

                        newDataSets.Add(vector);
                    }

                    //   var enumerable = vectorsEnumerable.Select(data => data.Select(vec => newDataSets.Remove(vec)));

                    foreach (var data in vectorsEnumerable)
                    {
                        newDataSets.Remove(data);
                    }

                    count++;
                    progressBar.Report((double)count / vocabulary.Words.Length);
                }
            }

            return(newDataSets);
        }