Ejemplo n.º 1
0
    public void GenerateDistortionReportForKMeans()
    {
        var embeddingsFile = new DirectoryInfo($@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}").EnumerateFiles()
                             .Where(f => Regex.IsMatch(f.Name, "^wordEmbeddings-.*$"))
                             .OrderBy(f => f.CreationTime)
                             .Last();
        var reportFileLoc = $@"{Directory.GetCurrentDirectory()}/{ResultsDirectory}/report-{DateTime.Now.Ticks}.csv";

        using var fileStream = new FileStream(embeddingsFile.FullName, FileMode.OpenOrCreate, FileAccess.Read);
        using var reader     = new StreamReader(fileStream, Encoding.UTF8);
        var wordEmbeddings = new List <WordEmbedding>();

        wordEmbeddings.NormaliseEmbeddings();
        wordEmbeddings.PopulateWordEmbeddingsFromStream(reader);

        var articleEmbeddings = new List <ArticleEmbedding>();

        foreach (var line in File.ReadLines(InputFileLoc))
        {
            var splitLine = line.Split(',');
            articleEmbeddings.Add(new ArticleEmbedding(splitLine[0], string.Join(' ', splitLine.Skip(1)), maxContentsLength: 500));
        }
        articleEmbeddings.AssignVectorsFromWeightedWordEmbeddings(wordEmbeddings);

        var kMeans      = new KMeans(articleEmbeddings);
        var distortions = new Dictionary <object, object>();

        for (var i = 2; i <= 25; i++)
        {
            kMeans.CalculateLabelClusterMap(numberOfClusters: i);
            distortions.Add(i, kMeans.CalculateDistortion());
        }

        var reportHandler = new ReportWriter(reportFileLoc);

        reportHandler.WriteMisc(distortions);
    }