Пример #1
0
        public static async Task ComputeFpMfpImage(Dataset dataset, Conf conf)
        {
            string mainDirectory        = conf.mainDir;
            bool   precomputationOnly   = conf.precomputationOnly;
            string plantUmlJarPath      = conf.plantUmlJarPath;
            string localGraphvizDotPath = conf.LocalGraphvizDotPath;

            log.LogInformation("Precomputation...");
            var jsonDatasetPath = Path.Combine(mainDirectory, dataset.Label);

            Directory.CreateDirectory(jsonDatasetPath);
            if (File.Exists(Path.Combine(jsonDatasetPath, "dataset.json")))
            {
                var content = await File.ReadAllTextAsync(Path.Combine(jsonDatasetPath, "dataset.json"));

                var datasetTmp = JsonConvert.DeserializeObject <Dataset>(content);
                dataset.dataTypeProperties = datasetTmp.dataTypeProperties;
                dataset.objectProperties   = datasetTmp.objectProperties;
                // dataset.superClassesOfClass = datasetTmp.superClassesOfClass;
                dataset.classesDepths = datasetTmp.classesDepths;
                dataset.classes       = datasetTmp.classes;
                dataset.properties    = datasetTmp.properties;
            }
            else
            {
                dataset.Precomputation(jsonDatasetPath).Wait();
                log.LogInformation($"Saving after precomputation: {Path.Combine(jsonDatasetPath, "dataset.json")}");
                var json = JsonConvert.SerializeObject(dataset);
                await File.WriteAllTextAsync(Path.Combine(jsonDatasetPath, "dataset.json"), json);
            }
            if (!dataset.dataTypeProperties.Any() ||
                !dataset.objectProperties.Any() ||
                // !dataset.superClassesOfClass.Any() ||
                !dataset.classesDepths.Any() ||
                !dataset.classes.Any() ||
                !dataset.properties.Any())
            {
                throw new Exception(@"Something went wrong during precomputation.
                    One of the dataset property is empty!");
            }

            if (precomputationOnly)
            {
                log.LogInformation("pre-computation only, the program stop here");
                return;
            }

            log.LogInformation("Getting classes...");
            List <InstanceLabel> classes = dataset.classes.Select(x => x.Value).ToList();

            // var jsonClassListPath = Path.Combine(mainDirectory, dataset.Label, "classes.json");
            // if (File.Exists(jsonClassListPath))
            // {
            //     var content = await File.ReadAllTextAsync(jsonClassListPath);
            //     classes = JsonConvert.DeserializeObject<List<InstanceLabel>>(content);
            // }
            // else
            // {
            //     classes = await dataset.GetInstanceClasses();
            //     var json = JsonConvert.SerializeObject(classes);
            //     await File.WriteAllTextAsync(jsonClassListPath, json);
            // }
#if DEBUG
            classes = classes.Where(x => x.Label == "Film").ToList();
            // classes = classes.Take(1).ToList();
#endif
            var total = classes.Count();
            log.LogInformation($"# of classes: {total}");
            var classesProcessedPath = Path.Combine(mainDirectory, dataset.Label, "classesProcessed.txt");
#if DEBUG
            File.Delete(classesProcessedPath);
#endif
            var classesProcessed = new ConcurrentBag <string>();
            if (File.Exists(classesProcessedPath))
            {
                var lines = await File.ReadAllLinesAsync(classesProcessedPath);

                foreach (var classProcessed in lines)
                {
                    classesProcessed.Add(classProcessed);
                }
                log.LogInformation($"# classes processed: {classesProcessed.Count}");
                classes = classes.Where(x => !classesProcessed.Contains(x.Uri)).ToList();
                total   = classes.Count();
                log.LogInformation($"new # of classes: {total}");
            }
            log.LogInformation("Looping on classes...");
            var count = 0;
            // var objLock = new Object();
            // this will be used after the parallel loop to re-download
            // images when a problems occurred
            // var failedContentForUmlPath = new ConcurrentBag<string>();
            // Parallel.ForEach(classes, instanceClass =>

            var listOfTrans = new List <(TransactionList <int>, InstanceLabel)>();

            log.LogInformation("Transactions computation...");
            foreach (var instanceClass in classes)
            {
                // Interlocked.Increment(ref count);
                count++;
                log.LogInformation($"class: {instanceClass.Label} ({count}/{total})");

                var instancePath = Path.Combine(
                    mainDirectory,
                    dataset.Label,
                    instanceClass.Label
                    );
                Directory.CreateDirectory(instancePath);
                var transactionsFilePath   = Path.Combine(instancePath, "transactions.json");
                var notransactionsFilePath = Path.Combine(instancePath, "NO_TRANSACTIONS.txt"); // used to avoid computing again transactions when there is none to compute!
                if (File.Exists(transactionsFilePath))
                {
                    // fp has already been computed
                    var jsonContent  = File.ReadAllText(transactionsFilePath);
                    var transactions = JsonConvert.DeserializeObject <TransactionList <int> >(jsonContent);
                    listOfTrans.Add((transactions, instanceClass));
                }
                else if (File.Exists(notransactionsFilePath))
                {
                    log.LogTrace($"There is no transactions for class {instanceClass.Label}, there is no need to continue.");
                    continue;//return;
                }
                else
                {
                    log.LogDebug($"computation of transactions...");
                    var transactions = TransactionList <int> .GetTransactions(dataset, instanceClass).Result;

                    log.LogDebug($"transactions computed: {transactions.transactions.Count}");
                    if (!transactions.transactions.Any())
                    {
                        log.LogTrace($"There is no transactions for class {instanceClass.Label}, there is no need to continue.");
                        File.WriteAllTextAsync(notransactionsFilePath, "").Wait();
                        continue;//return;
                    }
                    // ex: ${workingdirectory}/DBpedia/Film

                    transactions.SaveToFiles(
                        Path.Combine(instancePath, "transactions.txt"),
                        Path.Combine(instancePath, "dictionary.txt")
                        ).Wait();
                    listOfTrans.Add((transactions, instanceClass));
                    var jsonFP = JsonConvert.SerializeObject(transactions);
                    File.WriteAllTextAsync(transactionsFilePath, jsonFP).Wait();
                }
            }

            var maxDegreeOfParallelism = 70;
#if DEBUG
            maxDegreeOfParallelism = 1;
#endif
            log.LogInformation("FPs computation...");
            count = 0;
            //foreach (var trans in listOfTrans){
            var fps = listOfTrans.OrderBy(x => x.Item1.transactions.Count).AsParallel().AsOrdered().WithExecutionMode(ParallelExecutionMode.ForceParallelism).WithDegreeOfParallelism(maxDegreeOfParallelism).Select(tuple =>
            {
                Interlocked.Increment(ref count);
                var transactions  = tuple.Item1;
                var instanceClass = tuple.Item2;
                var instancePath  = Path.Combine(
                    mainDirectory,
                    dataset.Label,
                    instanceClass.Label
                    );
                // Directory.CreateDirectory(instancePath);
                var fpFilePath = Path.Combine(instancePath, "fp.json");
                var fp         = new FrequentPattern <int>(serviceProvider);
                if (File.Exists(fpFilePath))
                {
                    log.LogDebug($"FP already computed... {instanceClass.Label}: {count}/{listOfTrans.Count}");
                    // fp has already been computed
                    var jsonContent = File.ReadAllText(fpFilePath);
                    fp = JsonConvert.DeserializeObject <FrequentPattern <int> >(jsonContent);
                    fp.SetServiceProvider(serviceProvider);
                    for (int i = 0; i < fp.fis.Count; i++)
                    {
                        for (int j = 0; j < fp.fis[i].TransactionIDList.Count; j++)
                        {
                            fp.fis[i].Add(fp.fis[i].TransactionIDList[j]);
                        }
                    }
                    fp.transactions = transactions;
                }
                else
                {
                    log.LogDebug($"computation of fp... {instanceClass.Label}: {count}/{listOfTrans.Count}");
                    fp.GetFrequentPatternV2(transactions, 0.01);
                    fp.SaveFP(Path.Combine(instancePath, "fp.txt")).Wait();

                    for (int i = 0; i < fp.fis.Count; i++)
                    {
                        for (int j = 0; j < fp.fis[i].Count; j++)
                        {
                            fp.fis[i].TransactionIDList.Add(fp.fis[i][j]);
                        }
                    }
                    fp.transactions = null;
                    var jsonFP      = JsonConvert.SerializeObject(fp);
                    File.WriteAllTextAsync(fpFilePath, jsonFP).Wait();
                    fp.transactions = transactions;
                    log.LogDebug($"fp computed {instanceClass.Label}: {count}/{listOfTrans.Count}");
                }
                return(fp, instanceClass);
            }).Where(x => !string.IsNullOrWhiteSpace(x.instanceClass.Uri)).ToList();
            //}
            var thresholdRange = Enumerable.Range(50, 51).OrderByDescending(x => x).AsEnumerable();
            // var maxDegreeOfParallelism = 50;
#if DEBUG
            thresholdRange = new[] { 95 };
            // maxDegreeOfParallelism = 1;
#endif
            // ThreadPool.SetMinThreads(maxDegreeOfParallelism, maxDegreeOfParallelism);
            log.LogInformation("MFP and images computation...");
            count = 1;
            foreach (var(fp, instanceClass) in fps)
            {
                log.LogInformation($"MFP and images computation {count++}/{fps.Count}");
                foreach (var thresholdInt in thresholdRange)
                // Parallel.ForEach(thresholdRange, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, thresholdInt =>
                // thresholdRange.AsParallel().ForAll(thresholdInt =>

                //Parallel.ForEach(thresholdRange, thresholdInt =>
                {
                    log.LogInformation($"class: {instanceClass.Label} // threshold: {thresholdInt})");
                    var threshold    = thresholdInt / 100d;
                    var instancePath = Path.Combine(
                        mainDirectory,
                        dataset.Label,
                        instanceClass.Label
                        );
                    var imageFilePath = Path.Combine(
                        instancePath,
                        thresholdInt.ToString());
                    Directory.CreateDirectory(imageFilePath);

                    var sw   = Stopwatch.StartNew();
                    var mfps = fp.ComputeMFP(threshold).ToList();
                    fp.SaveMFP(Path.Combine(imageFilePath, "mfp.txt"), mfps).Wait();
                    sw.Stop();
                    log.LogInformation($"{mfps.Count} mfps computed in ({thresholdInt}): {sw.Elapsed.ToPrettyFormat()}");
                    sw.Restart();
                    var igs = ImageGenerator.GenerateTxtForUml(dataset,
                                                               instanceClass, threshold, fp, serviceProvider,
                                                               plantUmlJarPath, localGraphvizDotPath, mfps);


                    // );

                    var counter = 0;
                    foreach (var ig in igs)
                    {
                        counter++;
                        // we save the content sended to PlantUML, thus if
                        // a problem occurs, we will be able to regenerate
                        // images.
                        ig.SaveUsedClassesAndProperties(
                            Path.Combine(imageFilePath, $"usedClasses_{counter}.json"),
                            Path.Combine(imageFilePath, $"usedProperties_{counter}.json")).Wait();
                        ig.SaveContentForPlantUML(Path.Combine(imageFilePath, $"plant_{counter}.txt")).Wait();
                    }
                    log.LogInformation($"images computed in ({thresholdInt}): {sw.Elapsed.ToPrettyFormat()}");

                    classesProcessed.Add(instanceClass.Uri);
                    // lock (objLock)
                    // {
                    log.LogInformation("saving processed classes...");
                    File.WriteAllLinesAsync(classesProcessedPath, classesProcessed).Wait();
                    log.LogInformation("processed classes saved");
                }
                // }
            }
            // );
            log.LogInformation("main loop ended!");
        }
Пример #2
0
        // public string GetSuperClass(string classUri)
        // {
        //     if ("http://www.w3.org/2002/07/owl#Thing".Equals(classUri))
        //         return string.Empty;
        //     else if (!fp.transactions.dataset.classesDepths.ContainsKey(classUri))
        //         return string.Empty;
        //     else
        //     {
        //         var set = fp.transactions.dataset.classesDepths[classUri];
        //         var setOne = fp.transactions.dataset.classesDepths[classUri].Values;
        //         if (!set.Any()) return string.Empty;
        //         else
        //         {
        //             var closestSuperClass = set.OrderBy(x => x.Value).Select(x => x.Key).First();
        //             var cIL = fp.transactions.dataset.classes.ContainsKey(classUri) ?
        //                     fp.transactions.dataset.classes[classUri] :
        //                     new InstanceLabel(classUri, null, null);
        //             usedClassInstanceLabel.Add(cIL);
        //             var c = cIL.Label;
        //             var scIL = fp.transactions.dataset.classes.ContainsKey(closestSuperClass) ?
        //                 fp.transactions.dataset.classes[closestSuperClass] :
        //                 new InstanceLabel(closestSuperClass, null, null);
        //             usedClassInstanceLabel.Add(scIL);
        //             var sc = scIL.Label;
        //             return sc + " <|-- " + c;
        //         }
        //     }
        // }



        public static List <ImageGenerator> GenerateTxtForUml(Dataset ds,
                                                              InstanceLabel instanceClass, double threshold,
                                                              FrequentPattern <int> fp, ServiceProvider serviceProvider, string plantUmlJarPath, string localGraphvizDotPath,
                                                              IEnumerable <PatternDiscovery.ItemSet <int> > mfps)
        {
            var thresholdInt = Convert.ToInt32(threshold * 100);
            var maximalSets  = //fp.fis.Where(x => x.IsMaximal)
                               mfps.OrderByDescending(x => x.Count)
                               .ThenByDescending(x => x.TransactionCount).ToList();
            var finalResults = new List <ImageGenerator>();

            foreach (var mfp in maximalSets)
            {
                var result = new ImageGenerator();
                result.log                  = serviceProvider.GetService <ILogger <ImageGenerator> >();
                result.fp                   = fp;
                result.plantUmlJarPath      = plantUmlJarPath;
                result.localGraphvizDotPath = localGraphvizDotPath;
                finalResults.Add(result);
                var cModel = new StringBuilder();

                cModel.AppendLine("@startuml");
                cModel.AppendLine("skinparam linetype ortho");
                var duplicate = new HashSet <string>();

                // var propertySupport = Convert.ToInt32(mfp.Support * 100);
                var usedProp = new HashSet <int>();
                result.usedPropInstanceLabel  = new HashSet <InstanceLabel>();
                result.usedClassInstanceLabel = new HashSet <InstanceLabel>();
                var classes = new HashSet <string>();
                // first loop for object properties
                foreach (var id in mfp)
                {
                    var property = fp.transactions.intToPredicateDict[id];
                    if (fp.transactions.dataset.objectProperties.ContainsKey(property))
                    {
                        var pIL = fp.transactions.dataset.properties.ContainsKey(property) ?
                                  fp.transactions.dataset.properties[property] :
                                  new InstanceLabel(property, null, null);
                        result.usedPropInstanceLabel.Add(pIL);
                        var p = pIL.Label;
                        var domainAndRange = fp.transactions.dataset.objectProperties[property];
                        var dash           = domainAndRange.dash;
                        var domain         = domainAndRange.domain;
                        var dIL            = fp.transactions.dataset.classes.ContainsKey(domain) ?
                                             fp.transactions.dataset.classes[domain] :
                                             new InstanceLabel(domain, null, null);
                        result.usedClassInstanceLabel.Add(dIL);
                        var d     = dIL.Label;
                        var range = domainAndRange.range;
                        if (domain.Equals(range))
                        {
                            continue;
                        }
                        var rIL = fp.transactions.dataset.classes.ContainsKey(range) ?
                                  fp.transactions.dataset.classes[range] :
                                  new InstanceLabel(range, null, null);
                        result.usedClassInstanceLabel.Add(rIL);
                        var    r = rIL.Label;
                        string strToAdd;
                        var    propertySupport = fp.fis.Where(x => x.Count == 1 && x.Contains(id)).Select(x => Convert.ToInt32(x.Support * 100d).ToString()).FirstOrDefault();
                        if (dash)
                        {
                            strToAdd = d + " .. " + r + " : " + p + " sup:" + propertySupport;
                        }
                        else
                        {
                            strToAdd = d + " -- " + r + " : " + p + " sup:" + propertySupport;
                        }
                        if (!duplicate.Contains(strToAdd))
                        {
                            cModel.AppendLine(strToAdd);
                            duplicate.Add(strToAdd);
                        }
                        usedProp.Add(id);
                        classes.Add(domain);
                        classes.Add(range);
                    }
                }
                cModel.AppendLine("class " + instanceClass.Label + "{");
                // second loop for datatype properties
                foreach (var id in mfp)
                {
                    var property = fp.transactions.intToPredicateDict[id];
                    if (fp.transactions.dataset.dataTypeProperties.ContainsKey(property))
                    {
                        var pIL = fp.transactions.dataset.properties.ContainsKey(property) ?
                                  fp.transactions.dataset.properties[property] :
                                  new InstanceLabel(property, null, null);
                        result.usedPropInstanceLabel.Add(pIL);
                        var p               = pIL.Label;
                        var datatype        = fp.transactions.dataset.dataTypeProperties.GetValueOrDefault(property);
                        var r               = datatype.GetUriFragment();
                        var propertySupport = fp.fis.Where(x => x.Count == 1 && x.Contains(id)).Select(x => Convert.ToInt32(x.Support * 100d).ToString()).FirstOrDefault();
                        var strToAdd        = p + ":" + r + " sup=" + propertySupport;
                        if (!duplicate.Contains(strToAdd))
                        {
                            cModel.AppendLine(strToAdd);
                            duplicate.Add(strToAdd);
                        }
                        usedProp.Add(id);
                    }
                }
                // third loop for properties without info
                foreach (var id in mfp.Except(usedProp))
                {
                    var property = fp.transactions.intToPredicateDict[id];
                    var pIL      = fp.transactions.dataset.properties.ContainsKey(property) ?
                                   fp.transactions.dataset.properties[property] :
                                   new InstanceLabel(property, null, null);
                    result.usedPropInstanceLabel.Add(pIL);
                    var p = pIL.Label;
                    var propertySupport = fp.fis.Where(x => x.Count == 1 && x.Contains(id)).Select(x => Convert.ToInt32(x.Support * 100d).ToString()).FirstOrDefault();
                    var strToAdd        = p + " sup=" + propertySupport;
                    if (!duplicate.Contains(strToAdd))
                    {
                        cModel.AppendLine(strToAdd);
                        duplicate.Add(strToAdd);
                    }
                }
                cModel.AppendLine("}");
                // loop for current class hierarchy
                foreach (var line in result.GetAllSuperClasses(instanceClass.Uri))
                {
                    if (!string.IsNullOrWhiteSpace(line) && !duplicate.Contains(line))
                    {
                        cModel.AppendLine(line);
                        duplicate.Add(line);
                    }
                }
                // loop for related classes hierarchy
                foreach (var classUri in classes)
                {
                    BuildClassesHierarchy(classUri, result, cModel, duplicate, classes, fp);
                    // var set = fp.transactions.dataset.classesDepths[classUri];

                    // var closestSuperClass = set.OrderBy(x => x.Value).Select(x => x.Key).First();
                    // var cIL = fp.transactions.dataset.classes.ContainsKey(classUri) ?
                    //         fp.transactions.dataset.classes[classUri] :
                    //         new InstanceLabel(classUri, null, null);
                    // result.usedClassInstanceLabel.Add(cIL);
                    // var c = cIL.Label;


                    // if (fp.transactions.dataset.classesDepths.ContainsKey(classUri))
                    // {
                    //     foreach (var superClass in fp.transactions.dataset.classesDepths[classUri].Keys)
                    //     {
                    //         var scIL = fp.transactions.dataset.classes.ContainsKey(closestSuperClass) ?
                    //     fp.transactions.dataset.classes[closestSuperClass] :
                    //     new InstanceLabel(closestSuperClass, null, null);
                    //         result.usedClassInstanceLabel.Add(scIL);
                    //         var sc = scIL.Label;
                    //         var strToAdd = sc + " <|-- " + c;
                    //         if (!duplicate.Contains(strToAdd))
                    //         {
                    //             cModel.AppendLine(strToAdd);
                    //             duplicate.Add(strToAdd);
                    //         }
                    //     }
                    // }



                    //var cIL = fp.transactions.dataset.classes.ContainsKey(classUri) ?
                    //    fp.transactions.dataset.classes[classUri] :
                    //    new InstanceLabel(classUri, null, null);
                    //result.usedClassInstanceLabel.Add(cIL);
                    //var c = cIL.Label;
                    //if (fp.transactions.dataset.classesDepths.ContainsKey(classUri))
                    //{
                    //   foreach (var superClass in fp.transactions.dataset.classesDepths[classUri].Keys)
                    //   {
                    //       var scIL = fp.transactions.dataset.classes.ContainsKey(superClass) ?
                    //           fp.transactions.dataset.classes[superClass] :
                    //            new InstanceLabel(superClass, null, null);
                    //       result.usedClassInstanceLabel.Add(scIL);
                    //       var sc = scIL.Label;
                    //       var strToAdd = sc + " <|-- " + c;
                    //       if (!duplicate.Contains(strToAdd))
                    //        {
                    //            cModel.AppendLine(strToAdd);
                    //            duplicate.Add(strToAdd);
                    //        }
                    //    }
                    //}
                }
                cModel.AppendLine("@enduml");
                result.contentForUml = cModel.ToString();
            }
            return(finalResults);
        }
Пример #3
0
        /// Build the hierachy of classes for the given class uri into the CModel stringbuilder.
        /// <summary>
        /// Build the hierachy of classes for the given class uri into the CModel stringbuilder.
        /// </summary>
        /// <returns></returns>
        private static void BuildClassesHierarchy(string classUri, ImageGenerator result, StringBuilder cModel, HashSet <string> duplicate, HashSet <string> classes, FrequentPattern <int> fp)
        {
            if ("http://www.w3.org/2002/07/owl#Thing".Equals(classUri))
            {
                return;
            }
            var set = fp.transactions.dataset.classesDepths[classUri];

            var closestSuperClass = set.OrderBy(x => x.Value).Select(x => x.Key).First();
            var cIL = fp.transactions.dataset.classes.ContainsKey(classUri) ?
                      fp.transactions.dataset.classes[classUri] :
                      new InstanceLabel(classUri, null, null);

            result.usedClassInstanceLabel.Add(cIL);
            var c = cIL.Label;


            if (fp.transactions.dataset.classesDepths.ContainsKey(classUri))
            {
                foreach (var superClass in fp.transactions.dataset.classesDepths[classUri].Keys)
                {
                    var scIL = fp.transactions.dataset.classes.ContainsKey(closestSuperClass) ?
                               fp.transactions.dataset.classes[closestSuperClass] :
                               new InstanceLabel(closestSuperClass, null, null);
                    result.usedClassInstanceLabel.Add(scIL);
                    var sc       = scIL.Label;
                    var strToAdd = sc + " <|-- " + c;
                    if (!duplicate.Contains(strToAdd))
                    {
                        cModel.AppendLine(strToAdd);
                        duplicate.Add(strToAdd);
                    }
                    if (!classes.Contains(scIL.Uri))
                    {
                        BuildClassesHierarchy(scIL.Uri, result, cModel, duplicate, classes, fp);
                    }
                }
            }
        }
Пример #4
0
        private void mineConditionalFPTree(ref List<ItemHeaderElement> conditionalItemHeader, ref List<FrequentPattern> frequentPatterns, string suffix)
        {
            foreach (ItemHeaderElement ihe in conditionalItemHeader)
            {
                List<string> items = new List<string>();
                items.Add(suffix);
                items.Add(ihe.itemID);
                items.Reverse();
                FrequentPattern fp = new FrequentPattern(items, ihe.support);
                frequentPatterns.Add(fp);
            }

            foreach (ItemHeaderElement ihe in conditionalItemHeader)
            {
                foreach (FPNode fpn in ihe.nodeLinks)
                {
                    List<string> items = new List<string>();
                    FPNode aFpn = fpn;
                    int support = aFpn.support;
                    while (aFpn.item != null)
                    {
                        support = min(support, aFpn.support);
                        items.Add(aFpn.item);
                        aFpn = aFpn.parent;
                    }
                    if (items.Count > 1 && support >= min_sup)
                    {
                        items.Reverse();
                        items.Add(suffix);
                        FrequentPattern fp = new FrequentPattern(items, support);
                        frequentPatterns.Add(fp);
                    }
                }
            }
        }