private static void ClearDatabase(EnronContext context, bool clearEmailAddresses, bool clearEmailEntities, bool clearEmailDestinations) { Console.WriteLine("Preparing the clearing query..."); if (clearEmailAddresses) { context.EmailAddresses.RemoveRange(context.EmailAddresses); } if (clearEmailEntities) { context.Emails.RemoveRange(context.Emails); } if (clearEmailDestinations) { context.DestinationEmails.RemoveRange(context.DestinationEmails); } Console.WriteLine("Removing elements from database..."); SaveChanges(context); Console.WriteLine("Database cleared"); }
public static Graph <string, double, double> GetGraph(EnronContext context, IQueryable <EmailObject> emails) { var graph = new Graph <string, double, double>(directed: true); var enronAddresses = new HashSet <string>(); var directedEmails = new Dictionary <(string, string), List <EmailObject> >(); // for each email that day foreach (var triplet in emails .Join( context.DestinationEmails, eo => eo.Id, de => de.EmailId, (eo, de) => new { eo, de } ) .Join( context.EmailAddresses, (pair) => pair.de.EmailAddressId, ea => ea.Id, (pair, ea) => new { pair.eo, ea } // email object and one of the senders ) .Where(pair => pair.ea.BelongsToEnron) .Join( context.EmailAddresses, (pair) => pair.eo.FromId, ea => ea.Id, (pair, ea) => new { pair.eo, pair.ea, eaFrom = ea } ) .Where(triplet => triplet.eaFrom.BelongsToEnron) ) { var emailTo = triplet.ea; var email = triplet.eo; var fromAddress = triplet.eaFrom; enronAddresses.Add(fromAddress.Address); enronAddresses.Add(emailTo.Address); var key = (fromAddress.Address, emailTo.Address); if (directedEmails.ContainsKey(key)) { // the same email could have been cloned to a different directory if (directedEmails[key].FirstOrDefault(em => em.SendDate == email.SendDate) == default) { directedEmails[key].Add(email); } } else { directedEmails.Add(key, new List <EmailObject>() { email }); } } foreach (var enronAddress in enronAddresses) { graph.AddVertex(enronAddress); } foreach (var directedEmailKVP in directedEmails) { var edge = directedEmailKVP.Key; if (!graph.Directed && graph.ContainsEdge(edge)) { graph[edge] = Math.Min(graph[edge], directedEmailKVP.Value.Count); } else { graph.AddEdge(edge, directedEmailKVP.Value.Count); } } return(graph); }
static void Main(string[] args) { using var context = new EnronContext(); Console.WriteLine($"{context.Emails.Count()} emails in total."); // var resultsFilePath = "./results.txt"; var topResultsFilePath = "./top_results.txt"; var topTopResultsFilePath = "./top_top_results.txt"; var matchingFeatureSelectors = new List <(string, Func <VertexPartialMatchingNode <string, double, double>, double>)>() { ("Upper bound", matching => matching.BestUpperBound), ("Lower bound", matching => matching.BestLowerBound) }; var edgeCostTypes = new List <(string, CostType, List <double>, List <double>)>() { // ("Unit cost Wojciechowski .5", CostType.UnitCost, new List<double>(){.5}, new List<double>(){.5}), // ("Unit cost Wojciechowski (1.) .5", CostType.UnitCost, new List<double>(){1}, new List<double>(){.5}), // ("Unit cost Wojciechowski .5 Riesen Bunke 1.", CostType.UnitCost, new List<double>(){.5, 1}, new List<double>(){1, .5}), // ("Unit cost Riesen Bunke 1.", CostType.UnitCost, new List<double>(){1}, new List<double>(){1}), ("Absolute value Wojciechowski .5", CostType.AbsoluteValue, new List <double>() { .5 }, new List <double>() { .5 }), // ("Absolute value Wojciechowski (1.) .5", CostType.AbsoluteValue, new List<double>(){1}, new List<double>(){.5}), // ("Absolute value Wojciechowski .5 Riesen Bunke 1.", CostType.AbsoluteValue, new List<double>(){.5, 1}, new List<double>(){1, .5}), ("Absolute value Riesen Bunke 1.", CostType.AbsoluteValue, new List <double>() { 1 }, new List <double>() { 1 }), // ("Absolute value bounded Wojciechowski .5", CostType.AbsoluteValueBounded, new List<double>(){.5}, new List<double>(){.5}), // ("Absolute value bounded Wojciechowski (1.) .5", CostType.AbsoluteValueBounded, new List<double>(){1}, new List<double>(){.5}), // ("Absolute value bounded Wojciechowski .5 Riesen Bunke 1.", CostType.AbsoluteValueBounded, new List<double>(){.5, 1}, new List<double>(){1, .5}), // ("Absolute value bounded Riesen Bunke 1.", CostType.AbsoluteValueBounded, new List<double>(){1}, new List<double>(){1}), }; var results = new Dictionary <(int vertexUpperBound, int k, string distanceScorerName, string edgeCostType, string matchingFeatureSelectorName), (double testAccuracy, double validationAccuracy)>(); var topResults = new Dictionary <(int vertexUpperBound, int k, string distanceScorerName, string edgeCostType, string matchingFeatureSelectorName), (double testAccuracy, double validationAccuracy)>(); for (int iteration = 0; iteration < 10; iteration++) { for (int vertexUpperBound = 3; vertexUpperBound < 8; vertexUpperBound++) { var dataset = GenerateDataSet( context, trainingProportion: 8, validatingProportion: 1, testingProportion: 1, vertexUpperBound: vertexUpperBound, randomSeed: iteration ); foreach (var(matchingFeatureSelectorName, matchingFeatureSelector) in matchingFeatureSelectors) { foreach (var(edgeCostTypeName, edgeCostType, aCollection, bCollection) in edgeCostTypes) { var matchingParameters = GraphMatchingParameters <string, double, double> .DoubleCostComposer( CostType.UnitCost, edgeCostType ); matchingParameters.aCollection = aCollection; matchingParameters.bCollection = bCollection; // determine closest neighbours for each test and validation graph var testMatchingClassPairsList = new List <(List <(VertexPartialMatchingNode <string, double, double>, bool)>, bool)>(); var validationMatchingClassPairsList = new List <(List <(VertexPartialMatchingNode <string, double, double>, bool)>, bool)>(); foreach (var(pair, i) in dataset.testSet.Zip(Enumerable.Range(0, int.MaxValue))) { var pairToAdd = ( KNNClassifier.KNNClassifier.FindClosest(pair.Item1, dataset.trainingSet, matchingParameters, matchingFeatureSelector), pair.Item2 ); testMatchingClassPairsList.Add(pairToAdd); // System.Console.WriteLine($"Test set: computed distance: {i * 100d / dataset.testSet.Count:f2}%."); } foreach (var(pair, i) in dataset.validationSet.Zip(Enumerable.Range(0, int.MaxValue))) { var pairToAdd = ( KNNClassifier.KNNClassifier.FindClosest(pair.Item1, dataset.trainingSet, matchingParameters, matchingFeatureSelector), pair.Item2 ); validationMatchingClassPairsList.Add(pairToAdd); // System.Console.WriteLine($"Validation set: computed distance: {i * 100d / dataset.validationSet.Count:f2}%."); } Func <int, Func <int, VertexPartialMatchingNode <string, double, double>, double>, List <(List <(VertexPartialMatchingNode <string, double, double>, bool)>, bool)>, double> getAccuracy = (k, distanceScorer, matchingClassPairsList) => { var truePositives = 0; var falsePositives = 0; var trueNegatives = 0; var falseNegatives = 0; foreach (var(matchingClassPairs, testGraphLabel) in matchingClassPairsList) { var classificationResult = KNNClassifier.KNNClassifier.Classify <string, double, double, bool>( matchingClassPairs, distanceScorer, k: k ); var result = (expected : testGraphLabel, received : classificationResult.graphClass); if (result.expected) { if (result.received) { truePositives += 1; } else { falseNegatives += 1; } } else { if (result.received) { falsePositives += 1; } else { trueNegatives += 1; } } } return((truePositives + trueNegatives) * 1d / matchingClassPairsList.Count); }; var ks = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1 }; var distanceScorers = new (string, Func <int, VertexPartialMatchingNode <string, double, double>, double>)[]