/// <summary> /// Парралельная обработка уникальных векторов по дистанции /// </summary> /// <param name="dataSets"></param> /// <param name="path"></param> /// <param name="flows">Кол-во потоков</param> /// <returns></returns> public IEnumerable <DataSets> CheckUniqueVectorsParallel(List <DataSets> dataSets, string path, int flows = 2) { var vocabulary = new Hash2VecTextReader().Read(path); var wordsLength = vocabulary.Words.Length; Data.NewDataSets = new List <DataSets>(dataSets); var threadList = new List <Thread>(); var offset = wordsLength / flows; var difference = wordsLength - offset * flows; for (int i = 0, index = 1, bias = 0; bias < wordsLength; i = i + offset, index++) { if (index == 2) { i++; } bias = index != flows ? offset * index : offset * index + difference; var uniqueVectors = new Core.UniqueVectors(vocabulary, dataSets, i, bias); threadList.Add(new Thread(uniqueVectors.GetUniqueVectors) { Name = $"Thread {index}" }); } threadList.ForEach(thread => thread.Start()); Wait(threadList); return(Data.NewDataSets); }
public List <DataSets> CheckUniqueVectors(List <DataSets> dataSets, string path) { var vocabulary = new Hash2VecTextReader().Read(path); var newDataSets = new List <DataSets>(dataSets); int count = 0; using (var progressBar = new ProgressBar()) { foreach (var representation in vocabulary.Words) { var distanceList = vocabulary.Distance(representation, 3, 2).ToList(); var vectorsDuplicats = distanceList.AsParallel().Where(dis => dis.DistanceValue >= 0.95); var vectorsEnumerable = vectorsDuplicats.AsParallel().SelectMany(vec => dataSets.Where(data => EqualsVectors(data.Vectors, vec.Representation.NumericVector))).ToList(); var vector = newDataSets.AsParallel() .FirstOrDefault(vec => EqualsVectors(vec.Vectors, representation.NumericVector)); if (vector != null) { newDataSets.Remove(vector); Parallel.ForEach(vectorsEnumerable, vec => { for (var i = 0; i < vector.Ideals.Length; i++) { if (vector.Ideals[i] < vec.Ideals[i]) { vector.Ideals[i] = vec.Ideals[i]; } } }); newDataSets.Add(vector); } // var enumerable = vectorsEnumerable.Select(data => data.Select(vec => newDataSets.Remove(vec))); foreach (var data in vectorsEnumerable) { newDataSets.Remove(data); } count++; progressBar.Report((double)count / vocabulary.Words.Length); } } return(newDataSets); }