Ejemplo n.º 1
0
        public static secondaryReportOnFVE GetExperimentIntroductionLine(String experimentName, Int32 folds)
        {
            secondaryReportOnFVE output = new secondaryReportOnFVE();

            output.Experiment     = experimentName;
            output.Folds          = folds;
            output.FVEHash        = "------------";
            output.FVEModel       = "------";
            output.FVPType        = "------";
            output.HTMLTagFactors = "------";
            output.UID            = experimentName;
            output.Comment        = "[separaptor row]";
            return(output);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Processes the specified logger.
        /// </summary>
        /// <param name="logger">The logger.</param>
        public void Process(ILogBuilder logger)
        {
            Double F1 = Double.MinValue;

            foreach (wlfClassifier.semanticFVExtractor fve in experiment.featureVectorExtractors_semantic)
            {
                secondaryReportOnFVE secReport = new secondaryReportOnFVE();
                secReport.Experiment = experiment.name;
                secReport.Folds      = experiment.validationSetup.k;
                String p = folder.path.Remove(folderRoot.path);

                secReport.Path = p;

                LoadSemanticClouds(fve, logger);


                secReport.UpdateSecondaryRecord(fve);

                secReport.Randomized = experiment.validationSetup.randomize;

                secReport.Classifiers = experiment.classifiers.Count;

                rangeFinderWithData ranger = new rangeFinderWithData();

                imbSCI.Core.math.classificationMetrics.classificationReportRowFlags flags = imbSCI.Core.math.classificationMetrics.classificationReportRowFlags.classifier;

                if (useDataTable)
                {
                    var dataTable = topperformers.GetDataTable();
                    foreach (DataRow topReport in dataTable.Rows) //report.bestPerformingClassifiers)
                    {
                        String name = topReport["Name"].toStringSafe();
                        Double f1   = 0;
                        Double.TryParse(topReport["F1measure"].ToString(), out f1);
                        if (name.Contains(fve.name))
                        {
                            secReport.Classifier = topReport["Classifier"].ToString();
                            secReport.F1Score    = f1;
                        }
                        ranger.Learn(f1);
                    }
                }
                else
                {
                    foreach (DocumentSetCaseCollectionReport topReport in topperformers.GetList()) //report.bestPerformingClassifiers)
                    {
                        ranger.Learn(topReport.F1measure);

                        if (topReport.Name.Contains(fve.name))
                        {
                            secReport.Classifier = topReport.Classifier;
                            secReport.F1Score    = topReport.F1measure;
                        }
                    }
                }

                secReport.F1ScoreDeviation = ranger.doubleEntries.GetStdDeviation(false);
                secReport.F1ScoreMean      = ranger.Average;

                // <------------------------------------ COMPUTING THE CLOUD METRICS
                Int32  cn         = 0;
                Double nodeCount  = 0;
                Double linkCount  = 0;
                Double pingLength = 0;
                if (semanticClouds.ContainsKey(fve.name))
                {
                    foreach (lemmaSemanticCloud sc in semanticClouds[fve.name])
                    {
                        nodeCount += sc.nodes.Count();
                        linkCount += sc.links.Count();


                        pingLength += freeGraphExtensions.PingGraphSize(sc, sc.primaryNodes, true, freeGraphPingType.maximumPingLength);
                        cn++;
                    }


                    nodeCount  = nodeCount.GetRatio(cn);
                    linkCount  = linkCount.GetRatio(cn);
                    pingLength = pingLength.GetRatio(semanticClouds[fve.name].Count());
                }

                secReport.NodeCount  = nodeCount;
                secReport.LinkRatio  = linkCount.GetRatio(nodeCount);
                secReport.GraphDepth = pingLength.GetRatio(nodeCount);

                // <------------------------------------------------------ FINISHED WITH CLOUD METRICS

                items.Add(secReport);

                if (secReport.F1Score > F1)
                {
                    F1           = secReport.F1Score;
                    topPerformer = secReport;
                }
                //F1 = Math.Max(secReport.F1Score, F1);

                logger.log("Collected data on [" + fve.name + "] [" + fve.description + "]");
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Generates the report set directory readme file.
        /// </summary>
        /// <param name="filename">The filename.</param>
        public void GenerateParentReadmeFile(String filename = "experiments_readme.txt")
        {
            folderNode node = folder;

            //if (folder.parent != null)
            //{
            //    node = folder.parent as folderNode;
            //}
            //else
            //{
            //    DirectoryInfo di = folder;
            //    DirectoryInfo dip = di.Parent;
            //    node = dip;
            //    node.caption = dip.Name;
            //    node.description = "Report data set on group of experiments";
            //}

            StringBuilder sb = new StringBuilder();

            // <--------- ---- ---- -- ------ DESCRIPTION GENERATION
            String ln = "# Report package [" + node.caption + "]";

            sb.AppendLine(ln);
            String line = "-".Repeat(ln.Length);

            sb.AppendLine(line);


            sb.AppendLine("This directory contains integral reports on performed experiments on web site classification, done with imbWBI library - part of imbVeles Framework.");
            sb.AppendLine("Each subdirectory contains reports on one particular configuration, tested on a range of semantic term expansion (Stx) values.");
            sb.AppendLine(line);
            sb.AppendLine();
            sb.AppendLine("Main directory structure:");
            sb.AppendLine("-- exp_[experient code name]");
            sb.AppendLine("-- exp_[experient code name]");
            sb.AppendLine("--            ...           ");
            sb.AppendLine("-- Summary");
            sb.AppendLine();
            sb.AppendLine(line);
            sb.AppendLine();
            sb.AppendLine("## Experiment subdirectories");
            sb.AppendLine("In the subdirectories named exp_[experiment code name] you will find summary text, spreadsheet and XML data files - describing the experiment.");
            sb.AppendLine("Wherever an Excel spreadsheet file is generated (in XLSX format), you'll find a subfolder named [data] where the same data is exported in Comma Separated Values (CSV) format for easier consumption by 3rd party software.");
            sb.AppendLine("-- In Excel spreadsheet files you'll find second sheet called LEGEND, where each column in the report is explained. The same column descriptions are saved in plain text format within [data] subfolders.");
            sb.AppendLine("In each subdirectory of this data set, across the complete directory tree, you will find [directory_readme.txt] where content of the subdirectory is described.");
            sb.AppendLine("Exact content of the experiment report subdirectores varies depending on reporting options used and version of the software.");
            sb.AppendLine();
            sb.AppendLine("However, there is general directory tree structure:");
            sb.AppendLine("-- exp_[experient code name]");
            sb.AppendLine("-- -- [name of FVE]_[sample randomization tag]_E[Stx]                    <- directory with report on experiment version, performed with Stx number of semantic term expansion steps");
            sb.AppendLine("-- -- -- [name of FVE]_[sample randomization tag]_E[Stx]00[fold id]      <- directory with report on [fold id] fold of k-fold schema used");
            sb.AppendLine("-- -- -- -- cases                                                        <- XML serialized data on Category Knowledge, constructed in this fold (from the training sample subset)");
            sb.AppendLine("-- -- General                                                            <- general report on the complete data set, different in scope depending on reporting options used");
            sb.AppendLine("-- -- SharedKnowledge                                                    <- XML serialized data (Lemma Tables) on all Cases in the data set");
            sb.AppendLine("-- -- errors                                <- Here you might find logs on exceptions, if an error during execution happen. ");
            sb.AppendLine("                                            | However, if the note.txt file is not empty - it should have earlier time/date creation stamp then the rest of the report.");
            sb.AppendLine("                                            | Only reports with error notes that should exist in this data set are the ones where Semantic Cloud");
            sb.AppendLine("                                            | failed to be created because of small amount of training data in the fold.");
            sb.AppendLine();
            sb.AppendLine("## Summary directory");
            sb.AppendLine("In the [Summary] directory, you will find aggregated overview reports on all experiments in this group. The reports are created separately for each [k] number of k-fold schemas.");
            sb.AppendLine("These reports are given in spreadsheet format (Excel file, and CSV in [data] subfolder) and in form of native XML serialized objects.");
            sb.AppendLine("Additional remarks:");
            sb.AppendLine("-- Reports may contain some additional metrics, subreports and other records, not mentioned nor explained in the research article. Like: ModelMetrics.xml. ");
            sb.AppendLine("-- These are some unfinished ideas, models for FVE evaluation that, at the end, were not used for research conclusions nor system validation.");
            sb.AppendLine();
            Double f1 = Double.MinValue;
            secondaryReportOnFVE topFVE = null;

            sb.AppendLine(line);

            sb.AppendLine("## List of the experiments contained in this data set");

            Int32 c = 1;

            foreach (var item in items)
            {
                ln = "[" + c.ToString("D2") + "] " + item.experiment.name + "               (sub experiments: " + item.items.Count.ToString() + ")";
                sb.AppendLine(ln);

                if (f1 < item.topPerformer.F1Score)
                {
                    topFVE = item.topPerformer;
                    f1     = item.topPerformer.F1Score;
                }

                c++;
            }



            sb.AppendLine(line);
            sb.AppendLine();
            sb.AppendLine("imbVeles Framework | imbWBI | GNU GPL v3.0 | http://blog.veles.rs | Goran Grubić | [email protected]");
            sb.AppendLine(line);
            sb.AppendLine("File generated: " + DateTime.Now.ToLongDateString() + ", " + DateTime.Now.ToLongTimeString());



            // <---------

            String p = node.pathFor(filename, imbSCI.Data.enums.getWritableFileMode.overwrite, "ReadMe file describing experiment reports contained in this report set.");

            File.WriteAllText(p, sb.ToString());

            node.AttachSubfolders();

            if (DoGenerateGraph)
            {
                var dgml = node.GetDirectedGraph(false, false, false, 3);
                dgml.Layout         = imbSCI.Graph.DGML.enums.GraphLayoutEnum.DependencyMatrix;
                dgml.GraphDirection = imbSCI.Graph.DGML.enums.GraphDirectionEnum.LeftToRight;
                dgml.Save(node.pathFor("directoryGraph.dgml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Directory structure Directed Graph in Microsoft DGML format - open in Visual Studio", true));

                //var dot = dgml.ConvertToDOT();

                //dot.Save(node.pathFor("directoryGraph.dot", imbSCI.Data.enums.getWritableFileMode.overwrite, "Directory structure Directed Graph in GraphVIZ DOT graph language format", true));
            }
            node.generateReadmeFiles(appManager.AppInfo);
        }