Ejemplo n.º 1
0
        private static void ClearDatabase(EnronContext context, bool clearEmailAddresses, bool clearEmailEntities, bool clearEmailDestinations)
        {
            Console.WriteLine("Preparing the clearing query...");

            if (clearEmailAddresses)
            {
                context.EmailAddresses.RemoveRange(context.EmailAddresses);
            }
            if (clearEmailEntities)
            {
                context.Emails.RemoveRange(context.Emails);
            }
            if (clearEmailDestinations)
            {
                context.DestinationEmails.RemoveRange(context.DestinationEmails);
            }

            Console.WriteLine("Removing elements from database...");
            SaveChanges(context);
            Console.WriteLine("Database cleared");
        }
Ejemplo n.º 2
0
        public static Graph <string, double, double> GetGraph(EnronContext context, IQueryable <EmailObject> emails)
        {
            var graph          = new Graph <string, double, double>(directed: true);
            var enronAddresses = new HashSet <string>();
            var directedEmails = new Dictionary <(string, string), List <EmailObject> >();

            // for each email that day
            foreach (var triplet in
                     emails
                     .Join(
                         context.DestinationEmails,
                         eo => eo.Id,
                         de => de.EmailId,
                         (eo, de) => new { eo, de }
                         )
                     .Join(
                         context.EmailAddresses,
                         (pair) => pair.de.EmailAddressId,
                         ea => ea.Id,
                         (pair, ea) => new { pair.eo, ea } // email object and one of the senders
                         )
                     .Where(pair => pair.ea.BelongsToEnron)
                     .Join(
                         context.EmailAddresses,
                         (pair) => pair.eo.FromId,
                         ea => ea.Id,
                         (pair, ea) => new { pair.eo, pair.ea, eaFrom = ea }
                         )
                     .Where(triplet => triplet.eaFrom.BelongsToEnron)
                     )
            {
                var emailTo     = triplet.ea;
                var email       = triplet.eo;
                var fromAddress = triplet.eaFrom;

                enronAddresses.Add(fromAddress.Address);
                enronAddresses.Add(emailTo.Address);

                var key = (fromAddress.Address, emailTo.Address);

                if (directedEmails.ContainsKey(key))
                {
                    // the same email could have been cloned to a different directory
                    if (directedEmails[key].FirstOrDefault(em => em.SendDate == email.SendDate) == default)
                    {
                        directedEmails[key].Add(email);
                    }
                }
                else
                {
                    directedEmails.Add(key, new List <EmailObject>()
                    {
                        email
                    });
                }
            }

            foreach (var enronAddress in enronAddresses)
            {
                graph.AddVertex(enronAddress);
            }

            foreach (var directedEmailKVP in directedEmails)
            {
                var edge = directedEmailKVP.Key;
                if (!graph.Directed && graph.ContainsEdge(edge))
                {
                    graph[edge] = Math.Min(graph[edge], directedEmailKVP.Value.Count);
                }
                else
                {
                    graph.AddEdge(edge, directedEmailKVP.Value.Count);
                }
            }

            return(graph);
        }
        static void Main(string[] args)
        {
            using var context = new EnronContext();
            Console.WriteLine($"{context.Emails.Count()} emails in total.");

            // var resultsFilePath = "./results.txt";
            var topResultsFilePath    = "./top_results.txt";
            var topTopResultsFilePath = "./top_top_results.txt";

            var matchingFeatureSelectors = new List <(string, Func <VertexPartialMatchingNode <string, double, double>, double>)>()
            {
                ("Upper bound", matching => matching.BestUpperBound),
                ("Lower bound", matching => matching.BestLowerBound)
            };

            var edgeCostTypes = new List <(string, CostType, List <double>, List <double>)>()
            {
                // ("Unit cost Wojciechowski .5", CostType.UnitCost, new List<double>(){.5}, new List<double>(){.5}),
                // ("Unit cost Wojciechowski (1.) .5", CostType.UnitCost, new List<double>(){1}, new List<double>(){.5}),
                // ("Unit cost Wojciechowski .5 Riesen Bunke 1.", CostType.UnitCost, new List<double>(){.5, 1}, new List<double>(){1, .5}),
                // ("Unit cost Riesen Bunke 1.", CostType.UnitCost, new List<double>(){1}, new List<double>(){1}),

                ("Absolute value Wojciechowski .5", CostType.AbsoluteValue, new List <double>()
                {
                    .5
                }, new List <double>()
                {
                    .5
                }),
                // ("Absolute value Wojciechowski (1.) .5", CostType.AbsoluteValue, new List<double>(){1}, new List<double>(){.5}),
                // ("Absolute value Wojciechowski .5 Riesen Bunke 1.", CostType.AbsoluteValue, new List<double>(){.5, 1}, new List<double>(){1, .5}),
                ("Absolute value Riesen Bunke 1.", CostType.AbsoluteValue, new List <double>()
                {
                    1
                }, new List <double>()
                {
                    1
                }),

                // ("Absolute value bounded Wojciechowski .5", CostType.AbsoluteValueBounded, new List<double>(){.5}, new List<double>(){.5}),
                // ("Absolute value bounded Wojciechowski (1.) .5", CostType.AbsoluteValueBounded, new List<double>(){1}, new List<double>(){.5}),
                // ("Absolute value bounded Wojciechowski .5 Riesen Bunke 1.", CostType.AbsoluteValueBounded, new List<double>(){.5, 1}, new List<double>(){1, .5}),
                // ("Absolute value bounded Riesen Bunke 1.", CostType.AbsoluteValueBounded, new List<double>(){1}, new List<double>(){1}),
            };

            var results    = new Dictionary <(int vertexUpperBound, int k, string distanceScorerName, string edgeCostType, string matchingFeatureSelectorName), (double testAccuracy, double validationAccuracy)>();
            var topResults = new Dictionary <(int vertexUpperBound, int k, string distanceScorerName, string edgeCostType, string matchingFeatureSelectorName), (double testAccuracy, double validationAccuracy)>();

            for (int iteration = 0; iteration < 10; iteration++)
            {
                for (int vertexUpperBound = 3; vertexUpperBound < 8; vertexUpperBound++)
                {
                    var dataset = GenerateDataSet(
                        context,
                        trainingProportion: 8,
                        validatingProportion: 1,
                        testingProportion: 1,
                        vertexUpperBound: vertexUpperBound,
                        randomSeed: iteration
                        );
                    foreach (var(matchingFeatureSelectorName, matchingFeatureSelector) in matchingFeatureSelectors)
                    {
                        foreach (var(edgeCostTypeName, edgeCostType, aCollection, bCollection) in edgeCostTypes)
                        {
                            var matchingParameters = GraphMatchingParameters <string, double, double> .DoubleCostComposer(
                                CostType.UnitCost,
                                edgeCostType
                                );

                            matchingParameters.aCollection = aCollection;
                            matchingParameters.bCollection = bCollection;

                            // determine closest neighbours for each test and validation graph
                            var testMatchingClassPairsList       = new List <(List <(VertexPartialMatchingNode <string, double, double>, bool)>, bool)>();
                            var validationMatchingClassPairsList = new List <(List <(VertexPartialMatchingNode <string, double, double>, bool)>, bool)>();

                            foreach (var(pair, i) in dataset.testSet.Zip(Enumerable.Range(0, int.MaxValue)))
                            {
                                var pairToAdd = (
                                    KNNClassifier.KNNClassifier.FindClosest(pair.Item1, dataset.trainingSet, matchingParameters, matchingFeatureSelector),
                                    pair.Item2
                                    );
                                testMatchingClassPairsList.Add(pairToAdd);
                                // System.Console.WriteLine($"Test set: computed distance: {i * 100d / dataset.testSet.Count:f2}%.");
                            }

                            foreach (var(pair, i) in dataset.validationSet.Zip(Enumerable.Range(0, int.MaxValue)))
                            {
                                var pairToAdd = (
                                    KNNClassifier.KNNClassifier.FindClosest(pair.Item1, dataset.trainingSet, matchingParameters, matchingFeatureSelector),
                                    pair.Item2
                                    );
                                validationMatchingClassPairsList.Add(pairToAdd);
                                // System.Console.WriteLine($"Validation set: computed distance: {i * 100d / dataset.validationSet.Count:f2}%.");
                            }

                            Func <int, Func <int, VertexPartialMatchingNode <string, double, double>, double>, List <(List <(VertexPartialMatchingNode <string, double, double>, bool)>, bool)>, double> getAccuracy = (k, distanceScorer, matchingClassPairsList) =>
                            {
                                var truePositives  = 0;
                                var falsePositives = 0;
                                var trueNegatives  = 0;
                                var falseNegatives = 0;

                                foreach (var(matchingClassPairs, testGraphLabel) in matchingClassPairsList)
                                {
                                    var classificationResult = KNNClassifier.KNNClassifier.Classify <string, double, double, bool>(
                                        matchingClassPairs,
                                        distanceScorer,
                                        k: k
                                        );

                                    var result = (expected : testGraphLabel, received : classificationResult.graphClass);
                                    if (result.expected)
                                    {
                                        if (result.received)
                                        {
                                            truePositives += 1;
                                        }
                                        else
                                        {
                                            falseNegatives += 1;
                                        }
                                    }
                                    else
                                    {
                                        if (result.received)
                                        {
                                            falsePositives += 1;
                                        }
                                        else
                                        {
                                            trueNegatives += 1;
                                        }
                                    }
                                }

                                return((truePositives + trueNegatives) * 1d / matchingClassPairsList.Count);
                            };

                            var ks = new int[]
                            {
                                1,
                                2,
                                3,
                                4,
                                5,
                                6,
                                7,
                                8,
                                9,
                                10,
                                -1
                            };

                            var distanceScorers = new (string, Func <int, VertexPartialMatchingNode <string, double, double>, double>)[]