public void GenerateDistortionReportForKMeans() { var embeddingsFile = new DirectoryInfo($@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}").EnumerateFiles() .Where(f => Regex.IsMatch(f.Name, "^wordEmbeddings-.*$")) .OrderBy(f => f.CreationTime) .Last(); var reportFileLoc = $@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}/report-{DateTime.Now.Ticks}.csv"; using var fileStream = new FileStream(embeddingsFile.FullName, FileMode.OpenOrCreate, FileAccess.Read); using var reader = new StreamReader(fileStream, Encoding.UTF8); var wordEmbeddings = new List <WordEmbedding>(); wordEmbeddings.NormaliseEmbeddings(); wordEmbeddings.PopulateWordEmbeddingsFromStream(reader); var articleEmbeddings = new List <ArticleEmbedding>(); foreach (var line in File.ReadLines(InputFileLoc)) { var splitLine = line.Split(','); articleEmbeddings.Add(new ArticleEmbedding(splitLine[0], string.Join(' ', splitLine.Skip(1)), maxContentsLength: 500)); } articleEmbeddings.AssignVectorsFromWeightedWordEmbeddings(wordEmbeddings); var kMeans = new KMeans(articleEmbeddings); var distortions = new Dictionary <object, object>(); for (var i = 2; i <= 25; i++) { kMeans.CalculateLabelClusterMap(numberOfClusters: i); distortions.Add(i, kMeans.CalculateDistortion()); } var reportHandler = new ReportWriter(reportFileLoc); reportHandler.WriteMisc(distortions); }