Beispiel #1
0
        /// <summary>
        /// Transforms the clouds, related
        /// </summary>
        /// <param name="settings">The settings.</param>
        /// <param name="logger">The logger.</param>
        /// <param name="reductionReportName">Name of the reduction report.</param>
        /// <returns>
        /// Notes about reduced weights
        /// </returns>
        public cloudMatrixReductionReport TransformClouds(cloudMatrixSettings settings, ILogBuilder logger, String reductionReportName = "")
        {
            cloudMatrixReductionReport reductions = new cloudMatrixReductionReport();

            reductions.name = reductionReportName;


            instanceCountCollection <String> counter = GetCounter(false);
            List <String> passNames        = new List <string>();
            List <String> removeNames      = new List <string>();
            List <String> removeByLPFNames = new List <string>();
            List <String> setMiniNames     = new List <string>();

            //  lemmaSemanticCloud cloud = this.First().Key;

            MinCloudFrequency = counter.minFreq;
            MaxCloudFrequency = counter.maxFreq;

            Double lowPass = settings.lowPassFilter;

            if (!settings.isActive)
            {
                logger.log("Cloud matrix disabled");
                return(reductions);
            }
            if (settings.isFilterInAdaptiveMode)
            {
                lowPass = (MinCloudFrequency - 1) + lowPass;
                if (lowPass > MaxCloudFrequency)
                {
                    lowPass = MaxCloudFrequency;
                }
                if (lowPass < 1)
                {
                    lowPass = 1;
                }
                logger.log(": Cloud matrix filter in adaptive mode - cut off frequency set: " + lowPass);
            }


            var sorted = counter.getSorted();
            // <------------------------------------------------------------------------------------------ LOW PASS FILTER LIST
            List <String> doNotReduceWeight = new List <string>();

            foreach (String n in sorted) // <--------- performing cut of filter
            {
                if (settings.doCutOffByCloudFrequency)
                {
                    Int32   freq   = counter[n];
                    Boolean passOk = true;

                    if (counter[n] > lowPass)
                    {
                        passOk = false;
                    }
                    if (passOk)
                    {
                        passNames.AddUnique(n);
                    }
                    else
                    {
                        if (settings.doAssignMicroWeightInsteadOfRemoval)
                        {
                            // passNames.AddUnique(n);

                            setMiniNames.AddUnique(n);



                            // reductions.Add("All", n,    "[" + n + "] weight set to the microWeightNoiseGate limit");
//                            doNotReduceWeight.Add(n);
                        }
                        else
                        {
                            removeByLPFNames.AddUnique(n);
                            //reductions.Add("[" + n + "] was removed");
                        }
                    }
                }
                else
                {
                    passNames.Add(n);
                }
            }

            // <------------------------------------------------------------------------------------------ LOW PASS FILTER LIST

            foreach (lemmaSemanticCloud y in this.Get1stKeys())
            {
                y.RebuildIndex();
                y.description = y.description + " filtered version of cloud";

                reductions.Nodes         += y.CountNodes();
                reductions.InitialWeight += y.nodes.Sum(x => x.weight);
            }



            foreach (lemmaSemanticCloud cloud in this.Get1stKeys())
            {
                // <--- apply LPF

                foreach (String setMini in setMiniNames)
                {
                    var node = cloud.GetNode(setMini, true);
                    if (node != null)
                    {
                        reductions.Add(cloud.name, node.name, node.weight, settings.microWeightNoiseGate, cloudMatrixReductionAction.LowPassFilter);
                        node.weight = settings.microWeightNoiseGate;
                    }
                }


                if (settings.doDivideWeightWithCloudFrequency || settings.doUseSquareFunctionOfCF)
                {
                    Int32 rem = 0;
                    foreach (String n in passNames)
                    {
                        var node = cloud.GetNode(n, true);
                        if (node != null)
                        {
                            Double cf = counter[n];

                            if (settings.doDemoteAnyRepeatingSecondaryTerm)
                            {
                                if (cf > 1)
                                {
                                    if (node.type == 1)
                                    {
                                        node.type = 0;
                                        reductions.Add(cloud.name, node.name, node.weight, node.weight, cloudMatrixReductionAction.Demotion);

                                        //node.weight = node.weight * 0.5;
                                    }
                                }
                            }


                            if (settings.doRemoveAnyRepeatingPrimaryTerm)
                            {
                                if (cf > 1)
                                {
                                    if (node.type == 2)
                                    {
                                        reductions.Add(cloud.name, node.name, node.weight, 0, cloudMatrixReductionAction.Demotion);

                                        node.weight = 0;
                                    }
                                }
                            }
                            else if (settings.doDemoteAnyRepeatingPrimaryTerm)
                            {
                                if (cf > 1)
                                {
                                    if (node.type == 2)
                                    {
                                        reductions.Add(cloud.name, node.name, node.weight, node.weight, cloudMatrixReductionAction.Demotion);

                                        //node.weight = node.weight * 0.5;
                                        node.type = 1;
                                    }
                                }
                            }


                            if (!doNotReduceWeight.Contains(n))
                            {
                                if (node.weight > 0)
                                {
                                    //var cfd = cf + 1;

                                    if (cf > 1)
                                    {
                                        Double nw = node.weight;
                                        if (settings.doUseSquareFunctionOfCF)
                                        {
                                            node.weight = node.weight.GetRatio(cf * cf);
                                        }
                                        else
                                        {
                                            node.weight = node.weight.GetRatio(cf);
                                        }
                                        if (nw > node.weight)
                                        {
                                            reductions.Add(cloud.name, node.name, nw, node.weight, cloudMatrixReductionAction.CF_function);
                                            // reductions.Add("Term [" + node.name + "] weight [" + nw.ToString("F5") + "] reduced to [" + node.weight + "] in " + cloud.className + " CF[" + cf + "]");
                                        }
                                    }
                                }

                                if (node.weight > settings.microWeightNoiseGate)
                                {
                                }
                                else
                                {
                                    if (node.weight < settings.microWeightNoiseGate)
                                    {
                                        removeNames.AddUnique(n);
                                        //y.Remove(n);
                                        rem++;
                                    }
                                }
                            }
                        }
                    }
                }
            }

            foreach (lemmaSemanticCloud y in this.Get1stKeys())
            {
                Int32 rem = 0;
                foreach (String n in removeNames)
                {
                    var node = y.GetNode(n);
                    if (y.Remove(n))
                    {
                        rem++;
                        reductions.Add(y.name, node.name, node.weight, 0, cloudMatrixReductionAction.Microweight);
                        //reductions.Add("Term [" + n + "] removed from [" + y.className + "]");
                    }
                }

                foreach (String n in removeByLPFNames)
                {
                    var node = y.GetNode(n);
                    if (y.Remove(n))
                    {
                        rem++;
                        reductions.Add(y.name, node.name, node.weight, 0, cloudMatrixReductionAction.LPFRemoval);
                    }
                }

                if (rem > 0)
                {
                    logger.log(y.className + ": Terms removed[" + rem.ToString("D6") + "] left[" + y.CountNodes().ToString("D6") + "]");
                }
            }

            foreach (lemmaSemanticCloud y in this.Get1stKeys())
            {
                y.RebuildIndex();
                y.description = y.description + " filtered version of cloud";

                //   reductions.Nodes += y.CountNodes();
                reductions.ReducedWeight += y.nodes.Sum(x => x.weight);
            }

            logger.log("Clouds transformation done.");

            return(reductions);
        }
Beispiel #2
0
        /// <summary>
        /// Builds the table.
        /// </summary>
        /// <param name="settings">The settings.</param>
        /// <param name="type">The type.</param>
        /// <returns></returns>
        public DataTable BuildTable(cloudMatrixSettings settings, cloudMatrixDataTableType type)
        {
            DataTable table = new DataTable();

            table.SetTitle("CloudMatrix_" + name);
            table.SetDescription(description.or("Semantic cloud matrix report"));

            List <lemmaSemanticCloud> clouds = this.Get1stKeys().ToList();

            Int32 ci = 0;

            foreach (lemmaSemanticCloud cl in clouds)
            {
                table.SetAdditionalInfoEntry("Cloud " + ci, cl.className);
                if (cl.className.isNullOrEmpty())
                {
                    cl.className = "C" + ci.ToString("D2");
                }
                if (cl.name.isNullOrEmpty())
                {
                    cl.name = cl.className;
                }
                ci++;
            }

            instanceCountCollection <String> counter = GetCounter(type.HasFlag(cloudMatrixDataTableType.initialState));

            String format = "F5";

            if (type.HasFlag(cloudMatrixDataTableType.normalizedValues))
            {
                format = "F5";
            }
            else
            {
                format = "";
            }

            table.Add("Class", "Name of DocumentSetClass attached to the semantic clouds", "", typeof(String), imbSCI.Core.enums.dataPointImportance.normal);

            for (int i = 0; i < clouds.Count; i++)
            {
                table.Add(clouds[i].className, clouds[i].description, "C_" + i.ToString(), typeof(Double), imbSCI.Core.enums.dataPointImportance.normal, format, clouds[i].className);
            }

            table.Add("LemmasInitial", "Number of lemmas in the cloud, before reduction", "", typeof(Int32), imbSCI.Core.enums.dataPointImportance.important, "", "Lemmas - initial");

            table.Add("LinkRateInitial", "Link per node ratio, initial state", "", typeof(Double), imbSCI.Core.enums.dataPointImportance.normal, "F3", "Link rate initial");
            table.Add("LemmasAfter", "Number of lemmas in the cloud, after reduction", "", typeof(Int32), imbSCI.Core.enums.dataPointImportance.important, "", "Lemmas - after");

            table.Add("LinkRateAfter", "Link per node ratio, after reduction", "", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "F3", "Link rate after");

            for (int y = 0; y < clouds.Count; y++)
            {
                DataRow dr = table.NewRow();

                dr["Class"] = clouds[y].className;

                for (int x = 0; x < clouds.Count; x++)
                {
                    if (y == x)
                    {
                        dr[clouds[x].className] = 0;
                    }
                    else
                    {
                        dr[clouds[x].className] = GetCellNumber(clouds[x], clouds[y], type, counter);
                    }
                }

                dr["LemmasInitial"] = numberOfLemmas[clouds[y]];
                dr["LemmasAfter"]   = clouds[y].CountNodes();

                dr["LinkRateInitial"] = numberOfLinks[clouds[y]].GetRatio(numberOfLemmas[clouds[y]]);
                dr["LinkRateAfter"]   = clouds[y].CountLinks().GetRatio(clouds[y].CountNodes());

                table.Rows.Add(dr);
            }

            if (type.HasFlag(cloudMatrixDataTableType.overlapValue))
            {
                DataRow dr = table.NewRow();

                dr["Class"] = "Weight sums";

                for (int y = 0; y < clouds.Count; y++)
                {
                    Double sum = 0;
                    for (int x = 0; x < clouds.Count; x++)
                    {
                        sum += this[clouds[x], clouds[y]].Sum(c => c.weight);  // GetCellNumber(clouds[x], clouds[y], type, counter);
                    }
                    dr[clouds[y].className] = sum;
                    //dr[clouds[x].name] = clouds[x].nodes.Sum(s => s.weight);
                }

                dr["LemmasInitial"] = 0;
                dr["LemmasAfter"]   = 0;

                dr["LinkRateInitial"] = 0;
                dr["LinkRateAfter"]   = 0;

                table.Rows.Add(dr);
            }

            var ty = type.getEnumListFromFlags <cloudMatrixDataTableType>();

            foreach (cloudMatrixDataTableType t in ty)
            {
                table.SetAdditionalInfoEntry(t.toStringSafe(), t.toStringSafe().imbTitleCamelOperation(true));
            }

            if (type.HasFlag(cloudMatrixDataTableType.initialState))
            {
                table.AddExtra("The table shows the state of the matrix before transformation (filtration).");
            }
            else
            {
                table.AddExtra("The table shows the state of the matrix after transformation (filtration).");
            }

            if (type.HasFlag(cloudMatrixDataTableType.overlapSize))
            {
                table.AddExtra("Values in the table are showing number of lemmas that are common to the clouds (of x and y axis).");
            }
            else if (type.HasFlag(cloudMatrixDataTableType.maxCloudFrequency))
            {
                table.AddExtra("Values in the table are showing highest Cloud Frequency for a term (at x and y axis).");
            }
            else if (type.HasFlag(cloudMatrixDataTableType.minCloudFrequency))
            {
                table.AddExtra("Values in the table are showing lowest Cloud Frequency for a term (at x and y axis).");
            }
            else if (type.HasFlag(cloudMatrixDataTableType.overlapValue))
            {
                table.AddExtra("Values in the table are showing sum of local weights for overlapping terms. The last row contains sum of weights for the class cloud.");
            }

            if (type.HasFlag(cloudMatrixDataTableType.normalizedValues))
            {
                if (type.HasFlag(cloudMatrixDataTableType.overlapSize))
                {
                    table.AddExtra("The values are normalized to 0-1, where 1 is overlap size in initial state for each x,y cell.");
                }
                else
                {
                    table.AddExtra("The values are normalized to 0-1.");
                }
            }
            else
            {
                table.AddExtra("The values are absolute.");
            }


            table.SetAdditionalInfoEntry("Max. CF", MaxCloudFrequency);
            table.SetAdditionalInfoEntry("Min. CF", MinCloudFrequency);
            table.SetAdditionalInfoEntry("Max. Overlap", MaxOverlap);
            table.SetAdditionalInfoEntry("Min. Overlap", MinOverlap);
            return(table);
        }