private static void TrainUsingGoogleNews(ProgramArguments programArgs) { ITrainer trainer = new GoogleNewsTrainer(); IEnumerable <string> documents = trainer.Filter(programArgs); CorrelationMatrix matrix = trainer.CalculateCorrelationMatrix(documents); }
private static void OutputThresholdReport(List <Tuple <Statement, Statement> > pairs, ProgramArguments programArgs) { CorrelationMatrix correlationMatrix = LoadCorrelationMatrix(programArgs); SimilarityAlgorithm sim = new SimilarityAlgorithm(correlationMatrix); StringBuilder sb = new StringBuilder(); foreach (Tuple <Statement, Statement> pair in pairs) { Statement s1 = StemStatement(pair.Item1); Statement s2 = StemStatement(pair.Item2); double s12 = sim.StatementSimilarityToStatement(s1, s2); double s21 = sim.StatementSimilarityToStatement(s2, s1); bool areEqual = sim.StatementEqualsToStatement(s1, s2); sb.AppendFormat( "{0},{1},{2},{2}\r\n", pair.Item1.ToString().Replace(',', '.'), pair.Item2.ToString().Replace(',', '.'), Math.Min(s12, s21), Math.Abs(s12 - s21)); } string reportName = "autoRSS_thresholdReport_" + Guid.NewGuid().ToString() + ".csv"; using (StreamWriter sw = new StreamWriter(reportName)) { sw.WriteLine(sb.ToString()); } Console.WriteLine("Report: " + reportName); }
private static IEnumerable <DocumentClusterErrorScore> CategorizeLabeledNewsArticles(ProgramArguments programArgs) { IEnumerable <DocumentCluster> originalClusters = GetSimilarNewsTopicFiles(); IEnumerable <Document> documents = Flatten(originalClusters); CorrelationMatrix correlationMatrix = LoadCorrelationMatrix(programArgs); DocumentCategorizer categorizer = new DocumentCategorizer(correlationMatrix); IEnumerable <DocumentCluster> resultClusters = categorizer.Cluster(documents); OutputClusters(resultClusters); IEnumerable <DocumentClusterErrorScore> errorScores = CalculateErrorScore(originalClusters, resultClusters); return(errorScores); }