/// <summary> /// Transforms the clouds, related /// </summary> /// <param name="settings">The settings.</param> /// <param name="logger">The logger.</param> /// <param name="reductionReportName">Name of the reduction report.</param> /// <returns> /// Notes about reduced weights /// </returns> public cloudMatrixReductionReport TransformClouds(cloudMatrixSettings settings, ILogBuilder logger, String reductionReportName = "") { cloudMatrixReductionReport reductions = new cloudMatrixReductionReport(); reductions.name = reductionReportName; instanceCountCollection <String> counter = GetCounter(false); List <String> passNames = new List <string>(); List <String> removeNames = new List <string>(); List <String> removeByLPFNames = new List <string>(); List <String> setMiniNames = new List <string>(); // lemmaSemanticCloud cloud = this.First().Key; MinCloudFrequency = counter.minFreq; MaxCloudFrequency = counter.maxFreq; Double lowPass = settings.lowPassFilter; if (!settings.isActive) { logger.log("Cloud matrix disabled"); return(reductions); } if (settings.isFilterInAdaptiveMode) { lowPass = (MinCloudFrequency - 1) + lowPass; if (lowPass > MaxCloudFrequency) { lowPass = MaxCloudFrequency; } if (lowPass < 1) { lowPass = 1; } logger.log(": Cloud matrix filter in adaptive mode - cut off frequency set: " + lowPass); } var sorted = counter.getSorted(); // <------------------------------------------------------------------------------------------ LOW PASS FILTER LIST List <String> doNotReduceWeight = new List <string>(); foreach (String n in sorted) // <--------- performing cut of filter { if (settings.doCutOffByCloudFrequency) { Int32 freq = counter[n]; Boolean passOk = true; if (counter[n] > lowPass) { passOk = false; } if (passOk) { passNames.AddUnique(n); } else { if (settings.doAssignMicroWeightInsteadOfRemoval) { // passNames.AddUnique(n); setMiniNames.AddUnique(n); // reductions.Add("All", n, "[" + n + "] weight set to the microWeightNoiseGate limit"); // doNotReduceWeight.Add(n); } else { removeByLPFNames.AddUnique(n); //reductions.Add("[" + n + "] was removed"); } } } else { passNames.Add(n); } } // <------------------------------------------------------------------------------------------ LOW PASS FILTER LIST foreach (lemmaSemanticCloud y in this.Get1stKeys()) { y.RebuildIndex(); y.description = y.description + " filtered version of cloud"; reductions.Nodes += y.CountNodes(); reductions.InitialWeight += y.nodes.Sum(x => x.weight); } foreach (lemmaSemanticCloud cloud in this.Get1stKeys()) { // <--- apply LPF foreach (String setMini in setMiniNames) { var node = cloud.GetNode(setMini, true); if (node != null) { reductions.Add(cloud.name, node.name, node.weight, settings.microWeightNoiseGate, cloudMatrixReductionAction.LowPassFilter); node.weight = settings.microWeightNoiseGate; } } if (settings.doDivideWeightWithCloudFrequency || settings.doUseSquareFunctionOfCF) { Int32 rem = 0; foreach (String n in passNames) { var node = cloud.GetNode(n, true); if (node != null) { Double cf = counter[n]; if (settings.doDemoteAnyRepeatingSecondaryTerm) { if (cf > 1) { if (node.type == 1) { node.type = 0; reductions.Add(cloud.name, node.name, node.weight, node.weight, cloudMatrixReductionAction.Demotion); //node.weight = node.weight * 0.5; } } } if (settings.doRemoveAnyRepeatingPrimaryTerm) { if (cf > 1) { if (node.type == 2) { reductions.Add(cloud.name, node.name, node.weight, 0, cloudMatrixReductionAction.Demotion); node.weight = 0; } } } else if (settings.doDemoteAnyRepeatingPrimaryTerm) { if (cf > 1) { if (node.type == 2) { reductions.Add(cloud.name, node.name, node.weight, node.weight, cloudMatrixReductionAction.Demotion); //node.weight = node.weight * 0.5; node.type = 1; } } } if (!doNotReduceWeight.Contains(n)) { if (node.weight > 0) { //var cfd = cf + 1; if (cf > 1) { Double nw = node.weight; if (settings.doUseSquareFunctionOfCF) { node.weight = node.weight.GetRatio(cf * cf); } else { node.weight = node.weight.GetRatio(cf); } if (nw > node.weight) { reductions.Add(cloud.name, node.name, nw, node.weight, cloudMatrixReductionAction.CF_function); // reductions.Add("Term [" + node.name + "] weight [" + nw.ToString("F5") + "] reduced to [" + node.weight + "] in " + cloud.className + " CF[" + cf + "]"); } } } if (node.weight > settings.microWeightNoiseGate) { } else { if (node.weight < settings.microWeightNoiseGate) { removeNames.AddUnique(n); //y.Remove(n); rem++; } } } } } } } foreach (lemmaSemanticCloud y in this.Get1stKeys()) { Int32 rem = 0; foreach (String n in removeNames) { var node = y.GetNode(n); if (y.Remove(n)) { rem++; reductions.Add(y.name, node.name, node.weight, 0, cloudMatrixReductionAction.Microweight); //reductions.Add("Term [" + n + "] removed from [" + y.className + "]"); } } foreach (String n in removeByLPFNames) { var node = y.GetNode(n); if (y.Remove(n)) { rem++; reductions.Add(y.name, node.name, node.weight, 0, cloudMatrixReductionAction.LPFRemoval); } } if (rem > 0) { logger.log(y.className + ": Terms removed[" + rem.ToString("D6") + "] left[" + y.CountNodes().ToString("D6") + "]"); } } foreach (lemmaSemanticCloud y in this.Get1stKeys()) { y.RebuildIndex(); y.description = y.description + " filtered version of cloud"; // reductions.Nodes += y.CountNodes(); reductions.ReducedWeight += y.nodes.Sum(x => x.weight); } logger.log("Clouds transformation done."); return(reductions); }
/// <summary> /// Builds the table. /// </summary> /// <param name="settings">The settings.</param> /// <param name="type">The type.</param> /// <returns></returns> public DataTable BuildTable(cloudMatrixSettings settings, cloudMatrixDataTableType type) { DataTable table = new DataTable(); table.SetTitle("CloudMatrix_" + name); table.SetDescription(description.or("Semantic cloud matrix report")); List <lemmaSemanticCloud> clouds = this.Get1stKeys().ToList(); Int32 ci = 0; foreach (lemmaSemanticCloud cl in clouds) { table.SetAdditionalInfoEntry("Cloud " + ci, cl.className); if (cl.className.isNullOrEmpty()) { cl.className = "C" + ci.ToString("D2"); } if (cl.name.isNullOrEmpty()) { cl.name = cl.className; } ci++; } instanceCountCollection <String> counter = GetCounter(type.HasFlag(cloudMatrixDataTableType.initialState)); String format = "F5"; if (type.HasFlag(cloudMatrixDataTableType.normalizedValues)) { format = "F5"; } else { format = ""; } table.Add("Class", "Name of DocumentSetClass attached to the semantic clouds", "", typeof(String), imbSCI.Core.enums.dataPointImportance.normal); for (int i = 0; i < clouds.Count; i++) { table.Add(clouds[i].className, clouds[i].description, "C_" + i.ToString(), typeof(Double), imbSCI.Core.enums.dataPointImportance.normal, format, clouds[i].className); } table.Add("LemmasInitial", "Number of lemmas in the cloud, before reduction", "", typeof(Int32), imbSCI.Core.enums.dataPointImportance.important, "", "Lemmas - initial"); table.Add("LinkRateInitial", "Link per node ratio, initial state", "", typeof(Double), imbSCI.Core.enums.dataPointImportance.normal, "F3", "Link rate initial"); table.Add("LemmasAfter", "Number of lemmas in the cloud, after reduction", "", typeof(Int32), imbSCI.Core.enums.dataPointImportance.important, "", "Lemmas - after"); table.Add("LinkRateAfter", "Link per node ratio, after reduction", "", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "F3", "Link rate after"); for (int y = 0; y < clouds.Count; y++) { DataRow dr = table.NewRow(); dr["Class"] = clouds[y].className; for (int x = 0; x < clouds.Count; x++) { if (y == x) { dr[clouds[x].className] = 0; } else { dr[clouds[x].className] = GetCellNumber(clouds[x], clouds[y], type, counter); } } dr["LemmasInitial"] = numberOfLemmas[clouds[y]]; dr["LemmasAfter"] = clouds[y].CountNodes(); dr["LinkRateInitial"] = numberOfLinks[clouds[y]].GetRatio(numberOfLemmas[clouds[y]]); dr["LinkRateAfter"] = clouds[y].CountLinks().GetRatio(clouds[y].CountNodes()); table.Rows.Add(dr); } if (type.HasFlag(cloudMatrixDataTableType.overlapValue)) { DataRow dr = table.NewRow(); dr["Class"] = "Weight sums"; for (int y = 0; y < clouds.Count; y++) { Double sum = 0; for (int x = 0; x < clouds.Count; x++) { sum += this[clouds[x], clouds[y]].Sum(c => c.weight); // GetCellNumber(clouds[x], clouds[y], type, counter); } dr[clouds[y].className] = sum; //dr[clouds[x].name] = clouds[x].nodes.Sum(s => s.weight); } dr["LemmasInitial"] = 0; dr["LemmasAfter"] = 0; dr["LinkRateInitial"] = 0; dr["LinkRateAfter"] = 0; table.Rows.Add(dr); } var ty = type.getEnumListFromFlags <cloudMatrixDataTableType>(); foreach (cloudMatrixDataTableType t in ty) { table.SetAdditionalInfoEntry(t.toStringSafe(), t.toStringSafe().imbTitleCamelOperation(true)); } if (type.HasFlag(cloudMatrixDataTableType.initialState)) { table.AddExtra("The table shows the state of the matrix before transformation (filtration)."); } else { table.AddExtra("The table shows the state of the matrix after transformation (filtration)."); } if (type.HasFlag(cloudMatrixDataTableType.overlapSize)) { table.AddExtra("Values in the table are showing number of lemmas that are common to the clouds (of x and y axis)."); } else if (type.HasFlag(cloudMatrixDataTableType.maxCloudFrequency)) { table.AddExtra("Values in the table are showing highest Cloud Frequency for a term (at x and y axis)."); } else if (type.HasFlag(cloudMatrixDataTableType.minCloudFrequency)) { table.AddExtra("Values in the table are showing lowest Cloud Frequency for a term (at x and y axis)."); } else if (type.HasFlag(cloudMatrixDataTableType.overlapValue)) { table.AddExtra("Values in the table are showing sum of local weights for overlapping terms. The last row contains sum of weights for the class cloud."); } if (type.HasFlag(cloudMatrixDataTableType.normalizedValues)) { if (type.HasFlag(cloudMatrixDataTableType.overlapSize)) { table.AddExtra("The values are normalized to 0-1, where 1 is overlap size in initial state for each x,y cell."); } else { table.AddExtra("The values are normalized to 0-1."); } } else { table.AddExtra("The values are absolute."); } table.SetAdditionalInfoEntry("Max. CF", MaxCloudFrequency); table.SetAdditionalInfoEntry("Min. CF", MinCloudFrequency); table.SetAdditionalInfoEntry("Max. Overlap", MaxOverlap); table.SetAdditionalInfoEntry("Min. Overlap", MinOverlap); return(table); }