public static void ComputeKNN(string srcPath, string dstPath, Action <string> writeLine) { writeLine("Preparing output directory structure ..."); Directory.CreateDirectory(dstPath); if (dstPath.Last() != '\\' || dstPath.Last() != '/') { dstPath += "\\"; } writeLine("Loading ..."); var files = Directory.GetFiles(srcPath); var remaining = files.Length; var threadNr = Environment.ProcessorCount; var itemVectors = new ConcurrentDictionary <string, SparseVector>(threadNr, 100_000); Parallel.ForEach ( files, new ParallelOptions { MaxDegreeOfParallelism = threadNr }, file => { var rem = Interlocked.Decrement(ref remaining); if (rem % 100 == 0) { writeLine(rem.ToString()); } var data = file.LoadJSONArray(new { I = default(int), V = default(float) }); var indices = new int[data.Length]; var buffer = new float[data.Length]; for (int i = 0; i < data.Length; i++) { buffer[i] = data[i].V; indices[i] = data[i].I; } var itemName = Path.GetFileNameWithoutExtension(file); var res = itemVectors.TryAdd(itemName, new SparseVector(buffer, indices)); if (res == false) { throw new Exception("Item already exists!"); } } ); writeLine("Indexing ..."); var indexToItem = new string[itemVectors.Count]; var indexToVector = new SparseVector[itemVectors.Count]; Parallel.ForEach(itemVectors, (pair, state, index) => { indexToItem[index] = pair.Key; indexToVector[index] = pair.Value; }); itemVectors = null; //Collect garbage and compact LOH heap now so the GC doesn't kick in while computing GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce; GC.Collect(); writeLine("Computing ..."); var N = indexToVector.Length; remaining = N; Parallel.For (0, N, () => new List <(string, float)>(N), (thisItemIndex, loop, localList) => { var rem = Interlocked.Decrement(ref remaining); if (rem % 100 == 0) { writeLine(rem.ToString()); } var thisItem = indexToItem[thisItemIndex]; var thisVector = indexToVector[thisItemIndex]; var dst = dstPath + thisItem + ".csv"; if (File.Exists(dst)) { return(localList); } var similarities = localList; for (int i = 0; i < N; i++) { var item = indexToItem[i]; var vector = indexToVector[i]; var sim = SparseVector.CosSimmilarity(thisVector, vector); similarities.Add((item, sim)); } similarities.Sort((x, y) => y.Item2.CompareTo(x.Item2)); using (var file = File.Open(dst, FileMode.CreateNew)) { using (var sw = new StreamWriter(file)) { sw.Write("Item"); sw.Write(';'); sw.Write("Similarity"); sw.WriteLine(); foreach (var similarity in similarities) { sw.Write(similarity.Item1); sw.Write(';'); sw.Write(similarity.Item2); sw.WriteLine(); } } } similarities.Clear(); return(localList); }, x => { } ); }