Exemplo n.º 1
0
        public List <DataSets> CheckUniqueVectors(List <DataSets> dataSets, string path)
        {
            var vocabulary  = new Hash2VecTextReader().Read(path);
            var newDataSets = new List <DataSets>(dataSets);
            int count       = 0;

            using (var progressBar = new ProgressBar())
            {
                foreach (var representation in vocabulary.Words)
                {
                    var distanceList     = vocabulary.Distance(representation, 3, 2).ToList();
                    var vectorsDuplicats = distanceList.AsParallel().Where(dis => dis.DistanceValue >= 0.95);

                    var vectorsEnumerable = vectorsDuplicats.AsParallel().SelectMany(vec =>
                                                                                     dataSets.Where(data => EqualsVectors(data.Vectors, vec.Representation.NumericVector))).ToList();

                    var vector = newDataSets.AsParallel()
                                 .FirstOrDefault(vec => EqualsVectors(vec.Vectors, representation.NumericVector));

                    if (vector != null)
                    {
                        newDataSets.Remove(vector);
                        Parallel.ForEach(vectorsEnumerable, vec =>
                        {
                            for (var i = 0; i < vector.Ideals.Length; i++)
                            {
                                if (vector.Ideals[i] < vec.Ideals[i])
                                {
                                    vector.Ideals[i] = vec.Ideals[i];
                                }
                            }
                        });

                        newDataSets.Add(vector);
                    }

                    //   var enumerable = vectorsEnumerable.Select(data => data.Select(vec => newDataSets.Remove(vec)));

                    foreach (var data in vectorsEnumerable)
                    {
                        newDataSets.Remove(data);
                    }

                    count++;
                    progressBar.Report((double)count / vocabulary.Words.Length);
                }
            }

            return(newDataSets);
        }