Beispiel #1
0
 private static void TrainUsingGoogleNews(ProgramArguments programArgs)
 {
     ITrainer             trainer   = new GoogleNewsTrainer();
     IEnumerable <string> documents = trainer.Filter(programArgs);
     CorrelationMatrix    matrix    = trainer.CalculateCorrelationMatrix(documents);
 }
Beispiel #2
0
        private static void OutputThresholdReport(List <Tuple <Statement, Statement> > pairs, ProgramArguments programArgs)
        {
            CorrelationMatrix   correlationMatrix = LoadCorrelationMatrix(programArgs);
            SimilarityAlgorithm sim = new SimilarityAlgorithm(correlationMatrix);
            StringBuilder       sb  = new StringBuilder();

            foreach (Tuple <Statement, Statement> pair in pairs)
            {
                Statement s1 = StemStatement(pair.Item1);
                Statement s2 = StemStatement(pair.Item2);

                double s12      = sim.StatementSimilarityToStatement(s1, s2);
                double s21      = sim.StatementSimilarityToStatement(s2, s1);
                bool   areEqual = sim.StatementEqualsToStatement(s1, s2);

                sb.AppendFormat(
                    "{0},{1},{2},{2}\r\n",
                    pair.Item1.ToString().Replace(',', '.'),
                    pair.Item2.ToString().Replace(',', '.'),
                    Math.Min(s12, s21),
                    Math.Abs(s12 - s21));
            }

            string reportName = "autoRSS_thresholdReport_" + Guid.NewGuid().ToString() + ".csv";

            using (StreamWriter sw = new StreamWriter(reportName))
            {
                sw.WriteLine(sb.ToString());
            }

            Console.WriteLine("Report: " + reportName);
        }
Beispiel #3
0
        private static IEnumerable <DocumentClusterErrorScore> CategorizeLabeledNewsArticles(ProgramArguments programArgs)
        {
            IEnumerable <DocumentCluster> originalClusters  = GetSimilarNewsTopicFiles();
            IEnumerable <Document>        documents         = Flatten(originalClusters);
            CorrelationMatrix             correlationMatrix = LoadCorrelationMatrix(programArgs);

            DocumentCategorizer           categorizer    = new DocumentCategorizer(correlationMatrix);
            IEnumerable <DocumentCluster> resultClusters = categorizer.Cluster(documents);

            OutputClusters(resultClusters);
            IEnumerable <DocumentClusterErrorScore> errorScores = CalculateErrorScore(originalClusters, resultClusters);

            return(errorScores);
        }