Ejemplo n.º 1
0
        public virtual Triple <IList <Formatter>, IList <float>, IList <float> > validateDocuments(FeatureMetaData[] injectWSFeatures, FeatureMetaData[] alignmentFeatures, bool computeEditDistance, string outputDir)
        {
            IList <Formatter> formatters = new List <Formatter>();
            IList <float>     distances  = new List <float>();
            IList <float>     errors     = new List <float>();

            System.DateTime start = System.DateTime.Now;
            try
            {
                IList <string>        allFiles          = Tool.getFilenames(rootDir, language.fileRegex);
                IList <InputDocument> documents         = Tool.load(allFiles, language);
                IList <InputDocument> parsableDocuments = BuffUtils.filter(documents, d => d.tree != null);
                System.DateTime       stop = System.DateTime.Now;
                //Console.Write("Load/parse all docs from {0} time {1:D} ms\n", rootDir, (stop - start) / 1000000);

                int ncpu = 1;
                if (FORCE_SINGLE_THREADED)
                {
                    ncpu = 2;
                }

                for (int i = 0; i < parsableDocuments.Count; i++)
                {
                    string fileName = parsableDocuments[i].fileName;

                    {
                        try
                        {
                            Triple <Formatter, float, float> results = validate(language, parsableDocuments, fileName,
                                                                                Formatter.DEFAULT_K, injectWSFeatures, alignmentFeatures, outputDir, computeEditDistance, false);
                            formatters.Add(results.a);
                            float editDistance = results.b;
                            distances.Add(editDistance);
                            float errorRate = results.c;
                            errors.Add(errorRate);
                        }
                        catch (Exception t)
                        {
                            System.Console.WriteLine(t.StackTrace);
                        }
                        return(null);
                    }
                }
            }
            finally
            {
                DateTime final_stop            = System.DateTime.Now;
                double   medianTrainingTime    = BuffUtils.median(trainingTimes);
                double   medianFormattingPerMS = BuffUtils.median(formattingTokensPerMS);
                Console.Write("Total time {0:D}ms\n", final_stop - start);
                Console.Write("Median training time {0:D}ms\n", medianTrainingTime);
                Console.Write("Median formatting time tokens per ms {0,5:F4}ms, min {1,5:F4} max {2,5:F4}\n", medianFormattingPerMS, BuffUtils.min(formattingTokensPerMS), BuffUtils.max(formattingTokensPerMS));
            }
            return(new Triple <IList <Formatter>, IList <float>, IList <float> >(formatters, distances, errors));
        }