public static async Task ComputeFpMfpImage(Dataset dataset, Conf conf) { string mainDirectory = conf.mainDir; bool precomputationOnly = conf.precomputationOnly; string plantUmlJarPath = conf.plantUmlJarPath; string localGraphvizDotPath = conf.LocalGraphvizDotPath; log.LogInformation("Precomputation..."); var jsonDatasetPath = Path.Combine(mainDirectory, dataset.Label); Directory.CreateDirectory(jsonDatasetPath); if (File.Exists(Path.Combine(jsonDatasetPath, "dataset.json"))) { var content = await File.ReadAllTextAsync(Path.Combine(jsonDatasetPath, "dataset.json")); var datasetTmp = JsonConvert.DeserializeObject <Dataset>(content); dataset.dataTypeProperties = datasetTmp.dataTypeProperties; dataset.objectProperties = datasetTmp.objectProperties; // dataset.superClassesOfClass = datasetTmp.superClassesOfClass; dataset.classesDepths = datasetTmp.classesDepths; dataset.classes = datasetTmp.classes; dataset.properties = datasetTmp.properties; } else { dataset.Precomputation(jsonDatasetPath).Wait(); log.LogInformation($"Saving after precomputation: {Path.Combine(jsonDatasetPath, "dataset.json")}"); var json = JsonConvert.SerializeObject(dataset); await File.WriteAllTextAsync(Path.Combine(jsonDatasetPath, "dataset.json"), json); } if (!dataset.dataTypeProperties.Any() || !dataset.objectProperties.Any() || // !dataset.superClassesOfClass.Any() || !dataset.classesDepths.Any() || !dataset.classes.Any() || !dataset.properties.Any()) { throw new Exception(@"Something went wrong during precomputation. One of the dataset property is empty!"); } if (precomputationOnly) { log.LogInformation("pre-computation only, the program stop here"); return; } log.LogInformation("Getting classes..."); List <InstanceLabel> classes = dataset.classes.Select(x => x.Value).ToList(); // var jsonClassListPath = Path.Combine(mainDirectory, dataset.Label, "classes.json"); // if (File.Exists(jsonClassListPath)) // { // var content = await File.ReadAllTextAsync(jsonClassListPath); // classes = JsonConvert.DeserializeObject<List<InstanceLabel>>(content); // } // else // { // classes = await dataset.GetInstanceClasses(); // var json = JsonConvert.SerializeObject(classes); // await File.WriteAllTextAsync(jsonClassListPath, json); // } #if DEBUG classes = classes.Where(x => x.Label == "Film").ToList(); // classes = classes.Take(1).ToList(); #endif var total = classes.Count(); log.LogInformation($"# of classes: {total}"); var classesProcessedPath = Path.Combine(mainDirectory, dataset.Label, "classesProcessed.txt"); #if DEBUG File.Delete(classesProcessedPath); #endif var classesProcessed = new ConcurrentBag <string>(); if (File.Exists(classesProcessedPath)) { var lines = await File.ReadAllLinesAsync(classesProcessedPath); foreach (var classProcessed in lines) { classesProcessed.Add(classProcessed); } log.LogInformation($"# classes processed: {classesProcessed.Count}"); classes = classes.Where(x => !classesProcessed.Contains(x.Uri)).ToList(); total = classes.Count(); log.LogInformation($"new # of classes: {total}"); } log.LogInformation("Looping on classes..."); var count = 0; // var objLock = new Object(); // this will be used after the parallel loop to re-download // images when a problems occurred // var failedContentForUmlPath = new ConcurrentBag<string>(); // Parallel.ForEach(classes, instanceClass => var listOfTrans = new List <(TransactionList <int>, InstanceLabel)>(); log.LogInformation("Transactions computation..."); foreach (var instanceClass in classes) { // Interlocked.Increment(ref count); count++; log.LogInformation($"class: {instanceClass.Label} ({count}/{total})"); var instancePath = Path.Combine( mainDirectory, dataset.Label, instanceClass.Label ); Directory.CreateDirectory(instancePath); var transactionsFilePath = Path.Combine(instancePath, "transactions.json"); var notransactionsFilePath = Path.Combine(instancePath, "NO_TRANSACTIONS.txt"); // used to avoid computing again transactions when there is none to compute! if (File.Exists(transactionsFilePath)) { // fp has already been computed var jsonContent = File.ReadAllText(transactionsFilePath); var transactions = JsonConvert.DeserializeObject <TransactionList <int> >(jsonContent); listOfTrans.Add((transactions, instanceClass)); } else if (File.Exists(notransactionsFilePath)) { log.LogTrace($"There is no transactions for class {instanceClass.Label}, there is no need to continue."); continue;//return; } else { log.LogDebug($"computation of transactions..."); var transactions = TransactionList <int> .GetTransactions(dataset, instanceClass).Result; log.LogDebug($"transactions computed: {transactions.transactions.Count}"); if (!transactions.transactions.Any()) { log.LogTrace($"There is no transactions for class {instanceClass.Label}, there is no need to continue."); File.WriteAllTextAsync(notransactionsFilePath, "").Wait(); continue;//return; } // ex: ${workingdirectory}/DBpedia/Film transactions.SaveToFiles( Path.Combine(instancePath, "transactions.txt"), Path.Combine(instancePath, "dictionary.txt") ).Wait(); listOfTrans.Add((transactions, instanceClass)); var jsonFP = JsonConvert.SerializeObject(transactions); File.WriteAllTextAsync(transactionsFilePath, jsonFP).Wait(); } } var maxDegreeOfParallelism = 70; #if DEBUG maxDegreeOfParallelism = 1; #endif log.LogInformation("FPs computation..."); count = 0; //foreach (var trans in listOfTrans){ var fps = listOfTrans.OrderBy(x => x.Item1.transactions.Count).AsParallel().AsOrdered().WithExecutionMode(ParallelExecutionMode.ForceParallelism).WithDegreeOfParallelism(maxDegreeOfParallelism).Select(tuple => { Interlocked.Increment(ref count); var transactions = tuple.Item1; var instanceClass = tuple.Item2; var instancePath = Path.Combine( mainDirectory, dataset.Label, instanceClass.Label ); // Directory.CreateDirectory(instancePath); var fpFilePath = Path.Combine(instancePath, "fp.json"); var fp = new FrequentPattern <int>(serviceProvider); if (File.Exists(fpFilePath)) { log.LogDebug($"FP already computed... {instanceClass.Label}: {count}/{listOfTrans.Count}"); // fp has already been computed var jsonContent = File.ReadAllText(fpFilePath); fp = JsonConvert.DeserializeObject <FrequentPattern <int> >(jsonContent); fp.SetServiceProvider(serviceProvider); for (int i = 0; i < fp.fis.Count; i++) { for (int j = 0; j < fp.fis[i].TransactionIDList.Count; j++) { fp.fis[i].Add(fp.fis[i].TransactionIDList[j]); } } fp.transactions = transactions; } else { log.LogDebug($"computation of fp... {instanceClass.Label}: {count}/{listOfTrans.Count}"); fp.GetFrequentPatternV2(transactions, 0.01); fp.SaveFP(Path.Combine(instancePath, "fp.txt")).Wait(); for (int i = 0; i < fp.fis.Count; i++) { for (int j = 0; j < fp.fis[i].Count; j++) { fp.fis[i].TransactionIDList.Add(fp.fis[i][j]); } } fp.transactions = null; var jsonFP = JsonConvert.SerializeObject(fp); File.WriteAllTextAsync(fpFilePath, jsonFP).Wait(); fp.transactions = transactions; log.LogDebug($"fp computed {instanceClass.Label}: {count}/{listOfTrans.Count}"); } return(fp, instanceClass); }).Where(x => !string.IsNullOrWhiteSpace(x.instanceClass.Uri)).ToList(); //} var thresholdRange = Enumerable.Range(50, 51).OrderByDescending(x => x).AsEnumerable(); // var maxDegreeOfParallelism = 50; #if DEBUG thresholdRange = new[] { 95 }; // maxDegreeOfParallelism = 1; #endif // ThreadPool.SetMinThreads(maxDegreeOfParallelism, maxDegreeOfParallelism); log.LogInformation("MFP and images computation..."); count = 1; foreach (var(fp, instanceClass) in fps) { log.LogInformation($"MFP and images computation {count++}/{fps.Count}"); foreach (var thresholdInt in thresholdRange) // Parallel.ForEach(thresholdRange, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, thresholdInt => // thresholdRange.AsParallel().ForAll(thresholdInt => //Parallel.ForEach(thresholdRange, thresholdInt => { log.LogInformation($"class: {instanceClass.Label} // threshold: {thresholdInt})"); var threshold = thresholdInt / 100d; var instancePath = Path.Combine( mainDirectory, dataset.Label, instanceClass.Label ); var imageFilePath = Path.Combine( instancePath, thresholdInt.ToString()); Directory.CreateDirectory(imageFilePath); var sw = Stopwatch.StartNew(); var mfps = fp.ComputeMFP(threshold).ToList(); fp.SaveMFP(Path.Combine(imageFilePath, "mfp.txt"), mfps).Wait(); sw.Stop(); log.LogInformation($"{mfps.Count} mfps computed in ({thresholdInt}): {sw.Elapsed.ToPrettyFormat()}"); sw.Restart(); var igs = ImageGenerator.GenerateTxtForUml(dataset, instanceClass, threshold, fp, serviceProvider, plantUmlJarPath, localGraphvizDotPath, mfps); // ); var counter = 0; foreach (var ig in igs) { counter++; // we save the content sended to PlantUML, thus if // a problem occurs, we will be able to regenerate // images. ig.SaveUsedClassesAndProperties( Path.Combine(imageFilePath, $"usedClasses_{counter}.json"), Path.Combine(imageFilePath, $"usedProperties_{counter}.json")).Wait(); ig.SaveContentForPlantUML(Path.Combine(imageFilePath, $"plant_{counter}.txt")).Wait(); } log.LogInformation($"images computed in ({thresholdInt}): {sw.Elapsed.ToPrettyFormat()}"); classesProcessed.Add(instanceClass.Uri); // lock (objLock) // { log.LogInformation("saving processed classes..."); File.WriteAllLinesAsync(classesProcessedPath, classesProcessed).Wait(); log.LogInformation("processed classes saved"); } // } } // ); log.LogInformation("main loop ended!"); }
// public string GetSuperClass(string classUri) // { // if ("http://www.w3.org/2002/07/owl#Thing".Equals(classUri)) // return string.Empty; // else if (!fp.transactions.dataset.classesDepths.ContainsKey(classUri)) // return string.Empty; // else // { // var set = fp.transactions.dataset.classesDepths[classUri]; // var setOne = fp.transactions.dataset.classesDepths[classUri].Values; // if (!set.Any()) return string.Empty; // else // { // var closestSuperClass = set.OrderBy(x => x.Value).Select(x => x.Key).First(); // var cIL = fp.transactions.dataset.classes.ContainsKey(classUri) ? // fp.transactions.dataset.classes[classUri] : // new InstanceLabel(classUri, null, null); // usedClassInstanceLabel.Add(cIL); // var c = cIL.Label; // var scIL = fp.transactions.dataset.classes.ContainsKey(closestSuperClass) ? // fp.transactions.dataset.classes[closestSuperClass] : // new InstanceLabel(closestSuperClass, null, null); // usedClassInstanceLabel.Add(scIL); // var sc = scIL.Label; // return sc + " <|-- " + c; // } // } // } public static List <ImageGenerator> GenerateTxtForUml(Dataset ds, InstanceLabel instanceClass, double threshold, FrequentPattern <int> fp, ServiceProvider serviceProvider, string plantUmlJarPath, string localGraphvizDotPath, IEnumerable <PatternDiscovery.ItemSet <int> > mfps) { var thresholdInt = Convert.ToInt32(threshold * 100); var maximalSets = //fp.fis.Where(x => x.IsMaximal) mfps.OrderByDescending(x => x.Count) .ThenByDescending(x => x.TransactionCount).ToList(); var finalResults = new List <ImageGenerator>(); foreach (var mfp in maximalSets) { var result = new ImageGenerator(); result.log = serviceProvider.GetService <ILogger <ImageGenerator> >(); result.fp = fp; result.plantUmlJarPath = plantUmlJarPath; result.localGraphvizDotPath = localGraphvizDotPath; finalResults.Add(result); var cModel = new StringBuilder(); cModel.AppendLine("@startuml"); cModel.AppendLine("skinparam linetype ortho"); var duplicate = new HashSet <string>(); // var propertySupport = Convert.ToInt32(mfp.Support * 100); var usedProp = new HashSet <int>(); result.usedPropInstanceLabel = new HashSet <InstanceLabel>(); result.usedClassInstanceLabel = new HashSet <InstanceLabel>(); var classes = new HashSet <string>(); // first loop for object properties foreach (var id in mfp) { var property = fp.transactions.intToPredicateDict[id]; if (fp.transactions.dataset.objectProperties.ContainsKey(property)) { var pIL = fp.transactions.dataset.properties.ContainsKey(property) ? fp.transactions.dataset.properties[property] : new InstanceLabel(property, null, null); result.usedPropInstanceLabel.Add(pIL); var p = pIL.Label; var domainAndRange = fp.transactions.dataset.objectProperties[property]; var dash = domainAndRange.dash; var domain = domainAndRange.domain; var dIL = fp.transactions.dataset.classes.ContainsKey(domain) ? fp.transactions.dataset.classes[domain] : new InstanceLabel(domain, null, null); result.usedClassInstanceLabel.Add(dIL); var d = dIL.Label; var range = domainAndRange.range; if (domain.Equals(range)) { continue; } var rIL = fp.transactions.dataset.classes.ContainsKey(range) ? fp.transactions.dataset.classes[range] : new InstanceLabel(range, null, null); result.usedClassInstanceLabel.Add(rIL); var r = rIL.Label; string strToAdd; var propertySupport = fp.fis.Where(x => x.Count == 1 && x.Contains(id)).Select(x => Convert.ToInt32(x.Support * 100d).ToString()).FirstOrDefault(); if (dash) { strToAdd = d + " .. " + r + " : " + p + " sup:" + propertySupport; } else { strToAdd = d + " -- " + r + " : " + p + " sup:" + propertySupport; } if (!duplicate.Contains(strToAdd)) { cModel.AppendLine(strToAdd); duplicate.Add(strToAdd); } usedProp.Add(id); classes.Add(domain); classes.Add(range); } } cModel.AppendLine("class " + instanceClass.Label + "{"); // second loop for datatype properties foreach (var id in mfp) { var property = fp.transactions.intToPredicateDict[id]; if (fp.transactions.dataset.dataTypeProperties.ContainsKey(property)) { var pIL = fp.transactions.dataset.properties.ContainsKey(property) ? fp.transactions.dataset.properties[property] : new InstanceLabel(property, null, null); result.usedPropInstanceLabel.Add(pIL); var p = pIL.Label; var datatype = fp.transactions.dataset.dataTypeProperties.GetValueOrDefault(property); var r = datatype.GetUriFragment(); var propertySupport = fp.fis.Where(x => x.Count == 1 && x.Contains(id)).Select(x => Convert.ToInt32(x.Support * 100d).ToString()).FirstOrDefault(); var strToAdd = p + ":" + r + " sup=" + propertySupport; if (!duplicate.Contains(strToAdd)) { cModel.AppendLine(strToAdd); duplicate.Add(strToAdd); } usedProp.Add(id); } } // third loop for properties without info foreach (var id in mfp.Except(usedProp)) { var property = fp.transactions.intToPredicateDict[id]; var pIL = fp.transactions.dataset.properties.ContainsKey(property) ? fp.transactions.dataset.properties[property] : new InstanceLabel(property, null, null); result.usedPropInstanceLabel.Add(pIL); var p = pIL.Label; var propertySupport = fp.fis.Where(x => x.Count == 1 && x.Contains(id)).Select(x => Convert.ToInt32(x.Support * 100d).ToString()).FirstOrDefault(); var strToAdd = p + " sup=" + propertySupport; if (!duplicate.Contains(strToAdd)) { cModel.AppendLine(strToAdd); duplicate.Add(strToAdd); } } cModel.AppendLine("}"); // loop for current class hierarchy foreach (var line in result.GetAllSuperClasses(instanceClass.Uri)) { if (!string.IsNullOrWhiteSpace(line) && !duplicate.Contains(line)) { cModel.AppendLine(line); duplicate.Add(line); } } // loop for related classes hierarchy foreach (var classUri in classes) { BuildClassesHierarchy(classUri, result, cModel, duplicate, classes, fp); // var set = fp.transactions.dataset.classesDepths[classUri]; // var closestSuperClass = set.OrderBy(x => x.Value).Select(x => x.Key).First(); // var cIL = fp.transactions.dataset.classes.ContainsKey(classUri) ? // fp.transactions.dataset.classes[classUri] : // new InstanceLabel(classUri, null, null); // result.usedClassInstanceLabel.Add(cIL); // var c = cIL.Label; // if (fp.transactions.dataset.classesDepths.ContainsKey(classUri)) // { // foreach (var superClass in fp.transactions.dataset.classesDepths[classUri].Keys) // { // var scIL = fp.transactions.dataset.classes.ContainsKey(closestSuperClass) ? // fp.transactions.dataset.classes[closestSuperClass] : // new InstanceLabel(closestSuperClass, null, null); // result.usedClassInstanceLabel.Add(scIL); // var sc = scIL.Label; // var strToAdd = sc + " <|-- " + c; // if (!duplicate.Contains(strToAdd)) // { // cModel.AppendLine(strToAdd); // duplicate.Add(strToAdd); // } // } // } //var cIL = fp.transactions.dataset.classes.ContainsKey(classUri) ? // fp.transactions.dataset.classes[classUri] : // new InstanceLabel(classUri, null, null); //result.usedClassInstanceLabel.Add(cIL); //var c = cIL.Label; //if (fp.transactions.dataset.classesDepths.ContainsKey(classUri)) //{ // foreach (var superClass in fp.transactions.dataset.classesDepths[classUri].Keys) // { // var scIL = fp.transactions.dataset.classes.ContainsKey(superClass) ? // fp.transactions.dataset.classes[superClass] : // new InstanceLabel(superClass, null, null); // result.usedClassInstanceLabel.Add(scIL); // var sc = scIL.Label; // var strToAdd = sc + " <|-- " + c; // if (!duplicate.Contains(strToAdd)) // { // cModel.AppendLine(strToAdd); // duplicate.Add(strToAdd); // } // } //} } cModel.AppendLine("@enduml"); result.contentForUml = cModel.ToString(); } return(finalResults); }
/// Build the hierachy of classes for the given class uri into the CModel stringbuilder. /// <summary> /// Build the hierachy of classes for the given class uri into the CModel stringbuilder. /// </summary> /// <returns></returns> private static void BuildClassesHierarchy(string classUri, ImageGenerator result, StringBuilder cModel, HashSet <string> duplicate, HashSet <string> classes, FrequentPattern <int> fp) { if ("http://www.w3.org/2002/07/owl#Thing".Equals(classUri)) { return; } var set = fp.transactions.dataset.classesDepths[classUri]; var closestSuperClass = set.OrderBy(x => x.Value).Select(x => x.Key).First(); var cIL = fp.transactions.dataset.classes.ContainsKey(classUri) ? fp.transactions.dataset.classes[classUri] : new InstanceLabel(classUri, null, null); result.usedClassInstanceLabel.Add(cIL); var c = cIL.Label; if (fp.transactions.dataset.classesDepths.ContainsKey(classUri)) { foreach (var superClass in fp.transactions.dataset.classesDepths[classUri].Keys) { var scIL = fp.transactions.dataset.classes.ContainsKey(closestSuperClass) ? fp.transactions.dataset.classes[closestSuperClass] : new InstanceLabel(closestSuperClass, null, null); result.usedClassInstanceLabel.Add(scIL); var sc = scIL.Label; var strToAdd = sc + " <|-- " + c; if (!duplicate.Contains(strToAdd)) { cModel.AppendLine(strToAdd); duplicate.Add(strToAdd); } if (!classes.Contains(scIL.Uri)) { BuildClassesHierarchy(scIL.Uri, result, cModel, duplicate, classes, fp); } } } }
private void mineConditionalFPTree(ref List<ItemHeaderElement> conditionalItemHeader, ref List<FrequentPattern> frequentPatterns, string suffix) { foreach (ItemHeaderElement ihe in conditionalItemHeader) { List<string> items = new List<string>(); items.Add(suffix); items.Add(ihe.itemID); items.Reverse(); FrequentPattern fp = new FrequentPattern(items, ihe.support); frequentPatterns.Add(fp); } foreach (ItemHeaderElement ihe in conditionalItemHeader) { foreach (FPNode fpn in ihe.nodeLinks) { List<string> items = new List<string>(); FPNode aFpn = fpn; int support = aFpn.support; while (aFpn.item != null) { support = min(support, aFpn.support); items.Add(aFpn.item); aFpn = aFpn.parent; } if (items.Count > 1 && support >= min_sup) { items.Reverse(); items.Add(suffix); FrequentPattern fp = new FrequentPattern(items, support); frequentPatterns.Add(fp); } } } }