Ejemplo n.º 1
0
        private static void FindBestThresholds(ProgramArguments programArgs)
        {
            IEnumerable <DocumentCluster> originalClusters  = GetSimilarNewsTopicFiles();
            IEnumerable <Document>        documents         = Flatten(originalClusters);
            CorrelationMatrix             correlationMatrix = LoadCorrelationMatrix(programArgs);

            string fileName = Guid.NewGuid().ToString() + ".csv";

            using (StreamWriter sw = new StreamWriter(fileName))
            {
                double errorGoal = 0.01;
                SortedDictionary <double, SortedDictionary <double, List <double> > > errorValues = new SortedDictionary <double, SortedDictionary <double, List <double> > >();
                ErrorOptimizer.Optimize(0, 1, 0, 1, (permissibleValue, variationValue) =>
                {
                    SimilarityAlgorithm similarityAlgorithm             = new SimilarityAlgorithm(correlationMatrix, permissibleValue, variationValue);
                    DocumentCategorizer categorizer                     = new DocumentCategorizer(similarityAlgorithm);
                    IEnumerable <DocumentCluster> resultClusters        = categorizer.Cluster(documents);
                    IEnumerable <DocumentClusterErrorScore> errorScores = CalculateErrorScore(originalClusters, resultClusters);

                    double average = (from score in errorScores select score.Value).Average();
                    Console.WriteLine("Average Error: " + average);

                    sw.WriteLine("{0}, {1}, {2}", permissibleValue, variationValue, average);

                    return(Math.Abs(average) <= errorGoal);
                });
            }
        }
Ejemplo n.º 2
0
        private static void ExperimentPandVThresholds(ProgramArguments programArgs)
        {
            IEnumerable <DocumentCluster> originalClusters  = GetSimilarNewsTopicFiles();
            IEnumerable <Document>        documents         = Flatten(originalClusters);
            CorrelationMatrix             correlationMatrix = LoadCorrelationMatrix(programArgs);

            double startP = 0,
                   endP   = 1,
                   startV = 0,
                   endV   = 1;
            double step   = 0.01;

            double[,] errorValues = new double[(int)((endP - startP) / step) + 1, (int)((endV - startV) / step) + 1];
            for (double i = 0, iP = startP; iP < endP; iP += step, i++)
            {
                for (double j = 0, iV = startV; iV < endV; iV += step, j++)
                {
                    SimilarityAlgorithm similarityAlgorithm
                        = new SimilarityAlgorithm(
                              correlationMatrix,
                              iP,
                              iV);
                    DocumentCategorizer                     categorizer    = new DocumentCategorizer(similarityAlgorithm);
                    IEnumerable <DocumentCluster>           resultClusters = categorizer.Cluster(documents);
                    IEnumerable <DocumentClusterErrorScore> errorScores    = CalculateErrorScore(resultClusters, originalClusters);

                    double average = (from score in errorScores select score.Value).Average();
                    Console.WriteLine("Average Error: " + average);

                    errorValues[(int)i, (int)j] = Math.Abs(average);
                }
            }

            string fileName = Guid.NewGuid().ToString() + ".csv";

            using (StreamWriter sw = new StreamWriter(fileName))
            {
                sw.Write("0, ");
                for (double j = 0, iV = startV; iV < endV; iV += step, j++)
                {
                    sw.Write("{0}, ", iV);
                }

                sw.WriteLine();

                for (double i = 0, iP = startP; iP < endP; iP += step, i++)
                {
                    sw.Write("{0}, ", iP);
                    for (double j = 0, iV = startV; iV < endV; iV += step, j++)
                    {
                        sw.Write("{0}, ", errorValues[(int)i, (int)j]);
                    }

                    sw.WriteLine();
                }
            }

            Logger.Log("Saved experiment to file: " + fileName);
        }
Ejemplo n.º 3
0
        private static IEnumerable <DocumentClusterErrorScore> CategorizeLabeledNewsArticles(ProgramArguments programArgs)
        {
            IEnumerable <DocumentCluster> originalClusters  = GetSimilarNewsTopicFiles();
            IEnumerable <Document>        documents         = Flatten(originalClusters);
            CorrelationMatrix             correlationMatrix = LoadCorrelationMatrix(programArgs);

            DocumentCategorizer           categorizer    = new DocumentCategorizer(correlationMatrix);
            IEnumerable <DocumentCluster> resultClusters = categorizer.Cluster(documents);

            OutputClusters(resultClusters);
            IEnumerable <DocumentClusterErrorScore> errorScores = CalculateErrorScore(originalClusters, resultClusters);

            return(errorScores);
        }