コード例 #1
0
        private void Level1Classification(List <DocumentData> TrainData, List <DocumentData> TestData)
        {
            var selectedFeatures = AppDataCenter.Singleton.FeaturesSelected.Select((a, i) => new { item = a, index = i })
                                   .Where(a => a.item.IsSelected).Select(a => a.index).ToArray();

            //  selectedFeatures = new int[] {  37 };



            ClassifierService.Service.InitSelectFeatures(selectedFeatures);



            ScalingFactors scales = null;

            //  if(AppDataCenter.Singleton.PathToScale != null)
            // {
            //      scales = FeatureListHolder.loadScalesFromFile(ClassifierService.Service.LocalFeatures, AppDataCenter.Singleton.PathToScale);


            // }
            // AppDataCenter.Singleton.AddConsoleMessage("Building Model");
            AppDataCenter.Singleton.ChosenFeatures = ClassifierService.Service.LocalFeatures;
            AppDataCenter.Singleton.UpdateFeatureVertHorzSelection();
            AppDataCenter.Singleton.EntireModelStats = ClassifierService.Service.CreateModel(AppDataCenter.Singleton.ChosenFields
                                                                                             , TrainData, TestData, Configuration, m_featuresCalculate, AppDataCenter.Singleton.AddConsoleMessage, TuneScale, AppDataCenter.Singleton.PathToScale);


            //  if (AppDataCenter.Singleton.PathToScale != null)
            // {


            //  FeatureListHolder.SaveScalesToFile(ClassifierService.Service.LocalFeatures, AppDataCenter.Singleton.PathToScale,scales);
            // }


            double[] confidenceOut;

            List <ReportResultItem> ReoprtResults = new List <ReportResultItem>();

            Array.ForEach(AppDataCenter.Singleton.ChosenFields,
                          new Action <string>(a =>
            {
                ReoprtResults.Add(new ReportResultItem(a));
            }));
            ReportResultItem NullResult = new ReportResultItem("");

            ReoprtResults.Add(NullResult);

            AppDataCenter.Singleton.RejectedFields.Clear();

            ClassifierService.Service.level1RunTestForLevel2(TrainData, AppDataCenter.Singleton.ChosenFields, AppDataCenter.Singleton.AddConsoleMessage);


            // add printing of train data sucss

            ConcurrentBag <FieldReportItem> fieldBag = new ConcurrentBag <FieldReportItem>();


            AppDataCenter.Singleton.AddConsoleMessage("Running Test on  : " + TestData.Count + " Pages");
            int index = 0;

            // int result;
            foreach (DocumentData doc1 in TestData)
            {
                var startTimeFeatures = DateTime.Now;

                //     AppDataCenter.Singleton.AddConsoleMessage("Testing Page : " + docnum++);

                //  List<FieldData> fields = ClassifierService.Service.GetFieldsOfDoc(FieldsToTest, doc1);
                TimeSpan FeatureCalcSpan = new TimeSpan(0);

                /* foreach (CandidateData field in doc1.Candidates)
                 * {
                 *   result = 0;
                 *   startTimeFeatures = DateTime.Now;
                 *   if (m_useLastRuntimeData == true)
                 *   {
                 *       foreach (var featureIndex in ClassifierService.Service.LocalFeatures
                 *                                       .Select((x, i) => AppDataCenter.Singleton.IsFeatureCalculate(x.Name) == true ? i : -1)
                 *                                       .Where(x => x >= 0))
                 *       {
                 *           field.Features[featureIndex] = ClassifierService.Service.GetFieldFeature(doc1, field, featureIndex);
                 *       }
                 *   }
                 *   else
                 *   {
                 *       field.Features = ClassifierService.Service.GetFieldFeatures(doc1, field);
                 *   }
                 *   FeatureCalcSpan += (DateTime.Now - startTimeFeatures);
                 *   result = ClassifierService.Service.GetDescition(field.Features, out confidenceOut);
                 *
                 *
                 * //  field.AccordConfidance = (ClassifierService.Service.NormalizaedConfidence(confidenceOut));
                 *   field.AccordConfidance = (confidenceOut);
                 * }*/
                List <int> featursIndexFilter = new List <int>();
                // Get the index of the feature we need to calculate - all other features will be use the value already exist in the field

                ClassifierService.Service.getDocFeatures(doc1, null, null, AppDataCenter.Singleton.ChosenFields, featursIndexFilter, false, null, true, true);

                double NumGolden  = 0;
                double NumMatched = 0;

                foreach (var r in AppDataCenter.Singleton.ChosenFields.Select((x, i) => new { Value = x, Index = i }))
                {
                    //  if (doc1.Candidates.Where(a => a.NameFromTypist == r.Value).ToList().Count() <= 0) continue;
                    NumGolden++;

                    CandidateData fieldCandidate = doc1.Candidates.OrderByDescending(a => a.AccordConfidance[r.Index]).FirstOrDefault();

                    FieldReportItem fieldReportItem = new FieldReportItem(r.Value, r.Index, fieldCandidate, doc1);


                    if (fieldReportItem.IsMatch)
                    {
                        NumMatched++;
                    }

                    /*  }
                     * else
                     * {
                     *    fieldReportItem.isRejected = true;
                     *
                     * }*/
                    if (!fieldReportItem.IsMatch)
                    {
                        Trace.WriteLine("Doc : " + doc1.DocumentName + " ,Confidance : " + fieldCandidate.AccordConfidance[r.Index] + ",Field : " + r.Value + " ,Recognized : " + fieldCandidate.NameFromTypist + " content : " + fieldCandidate.Content + (fieldReportItem.IsMatch ? " True" : " False"));
                    }



                    fieldBag.Add(fieldReportItem);
                }

                AppDataCenter.Singleton.AddConsoleMessage(index++ + " Page : " + doc1.DocumentName + " Getting Features (sec) : " + FeatureCalcSpan.TotalSeconds + " success : " + Math.Round(NumMatched / NumGolden * 100) + "%");
            }



            AppDataCenter.Singleton.AddConsoleMessage("Update results");


            AppDataCenter.Singleton.EntireModelStats.testresults = fieldBag.ToList();


            ReportData.noOfFields  = fieldBag.Count();
            ReportData.NoOfPages   = TestData.Count;
            ReportData.fpCount     = fieldBag.Where(a => a.IsFP).Count();
            ReportData.matchCount  = fieldBag.Where(a => a.IsMatch).Count();
            ReportData.rejectCount = fieldBag.Where(a => a.IsRejected).Count();

            foreach (FieldReportItem fieldresult in fieldBag.Where(a => a.IsMatch))
            {
                ReportData.QualityMatch.Add(fieldresult.MatchQuality);
            }

            List <FieldReportItem> FPList = fieldBag.Where(a => a.IsFP).ToList().OrderBy(b => b.Field.NameFromTypist ?? "").ThenByDescending(a => a.Field.AccordConfidance.Max()).ToList();


            foreach (var r in AppDataCenter.Singleton.ChosenFields)
            {
                List <FieldReportItem> FPfromGolden    = FPList.Where(a => a.ExpectedField.NameFromTypist == r).ToList();
                List <FieldReportItem> FPnotFromGolden = FPList.Where(a => a.ExpectedField.NameFromTypist != r && a.MatchedName == r).ToList();
                double success  = (double)fieldBag.Where(a => a.Field.NameFromTypist == r && a.IsMatch).Count() / (double)fieldBag.Where(a => a.Field.NameFromTypist == r).Count() * 100;
                int    countNum = fieldBag.Where(a => a.Field.NameFromTypist == r).Count();
                Trace.WriteLine("*********** The field : " + r + " success : " + Math.Round(success, 1) + "%   Count : " + countNum + "         *************");
            }
            AppDataCenter.Singleton.NotifyChange(NotifyGroup.ScatterData);
        }
コード例 #2
0
        private void Level3Classification(List <DocumentData> TrainData, List <DocumentData> TestData)
        {
            AppDataCenter.Singleton.AddConsoleMessage("\nStarting Level3 ***************\n");


            ClassifierService.Service.InitFeatureslevel3(AppDataCenter.Singleton.ChosenFields);


            ClassifierService.Service.buildModelLevel3(AppDataCenter.Singleton.ChosenFields
                                                       , TrainData, Configuration, AppDataCenter.Singleton.AddConsoleMessage);


            ConcurrentBag <FieldReportItem> fieldBag = new ConcurrentBag <FieldReportItem>();

            AppDataCenter.Singleton.AddConsoleMessage("Getting results from level 1 to be used in level 3");

            TestData.AsParallel().ForAll(doc =>
            {
                doc.lastResultsCandidates = new Dictionary <string, CandidateData>();
                for (int i = 0; i < AppDataCenter.Singleton.ChosenFields.Length; i++)
                {
                    CandidateData releventCandidate = doc.Candidates.Where(a => a.AccordConfidance[i] == doc.Candidates.Select(b => b.AccordConfidance[i]).Max()).FirstOrDefault();
                    doc.lastResultsCandidates.Add(AppDataCenter.Singleton.ChosenFields[i], releventCandidate);
                }
            });

            AppDataCenter.Singleton.AddConsoleMessage("Running Test on  : " + TestData.Count + " Pages");
            int index = 0;
            int result;

            double[] confidenceOut;



            foreach (DocumentData doc1 in TestData)
            {
                var startTimeFeatures = DateTime.Now;


                TimeSpan FeatureCalcSpan = new TimeSpan(0);

                foreach (CandidateData field in doc1.CandidatesForStage3)
                {
                    {
                        field.Features3 = ClassifierService.Service.GetFieldFeatures3(doc1, field);
                    }
                    FeatureCalcSpan += (DateTime.Now - startTimeFeatures);
                    result           = ClassifierService.Service.GetDescition3(field.Features3, out confidenceOut);


                    //  field.AccordConfidance = (ClassifierService.Service.NormalizaedConfidence(confidenceOut));
                    field.AccordConfidance3 = (confidenceOut);
                }

                double NumGolden  = 0;
                double NumMatched = 0;

                foreach (var r in AppDataCenter.Singleton.ChosenFields.Select((x, i) => new { Value = x, Index = i }))
                {
                    //  if (doc1.Candidates.Where(a => a.NameFromTypist == r.Value).ToList().Count() <= 0) continue;
                    NumGolden++;

                    CandidateData fieldCandidate = doc1.CandidatesForStage3.OrderByDescending(a => a.AccordConfidance[r.Index]).Take(7).OrderByDescending(a => a.AccordConfidance3[r.Index]).FirstOrDefault();

                    FieldReportItem fieldReportItem = new FieldReportItem(r.Value, r.Index, fieldCandidate, doc1);


                    if (fieldReportItem.IsMatch)
                    {
                        NumMatched++;
                    }

                    /*  }
                     * else
                     * {
                     *    fieldReportItem.isRejected = true;
                     *
                     * }*/
                    if (!fieldReportItem.IsMatch)
                    {
                        Trace.WriteLine("Doc : " + doc1.DocumentName + " ,Confidance : " + fieldCandidate.AccordConfidance3[r.Index] + ",Field : " + r.Value + " ,Recognized : " + fieldCandidate.NameFromTypist + " content : " + fieldCandidate.Content + (fieldReportItem.IsMatch ? " True" : " False"));
                    }



                    fieldBag.Add(fieldReportItem);
                }

                AppDataCenter.Singleton.AddConsoleMessage(index++ + " Page : " + doc1.DocumentName + " Getting Features (sec) : " + FeatureCalcSpan.TotalSeconds + " success : " + Math.Round(NumMatched / NumGolden * 100, 1) + "%");
            }



            AppDataCenter.Singleton.AddConsoleMessage("Update results");


            // AppDataCenter.Singleton.EntireModelStats.testresults = fieldBag.ToList();


            double noOfFields = fieldBag.Count();

            double fpCount     = fieldBag.Where(a => a.IsFP).Count();
            double matchCount  = fieldBag.Where(a => a.IsMatch).Count();
            double rejectCount = fieldBag.Where(a => a.IsRejected).Count();



            List <FieldReportItem> FPList = fieldBag.Where(a => a.IsFP).ToList().OrderBy(b => b.Field.NameFromTypist ?? "").ThenByDescending(a => a.Field.AccordConfidance.Max()).ToList();


            foreach (var r in AppDataCenter.Singleton.ChosenFields)
            {
                List <FieldReportItem> FPfromGolden    = FPList.Where(a => a.ExpectedField.NameFromTypist == r).ToList();
                List <FieldReportItem> FPnotFromGolden = FPList.Where(a => a.ExpectedField.NameFromTypist != r && a.MatchedName == r).ToList();
                double success  = (double)fieldBag.Where(a => a.Field.NameFromTypist == r && a.IsMatch).Count() / (double)fieldBag.Where(a => a.Field.NameFromTypist == r).Count() * 100;
                int    countNum = fieldBag.Where(a => a.Field.NameFromTypist == r).Count();
                Trace.WriteLine("*********** The field : " + r + " success : " + Math.Round(success, 1) + "%   Count : " + countNum + "         *************");
            }


            AppDataCenter.Singleton.AddConsoleMessage("Total result level 3 , sucsss : " + Math.Round(matchCount / noOfFields * 100, 2) + "%");
        }
コード例 #3
0
        public void Level2Classification(List <DocumentData> TrainData, List <DocumentData> TestData)
        {
            List <int[]> groups = ClassifierService.Service.BuildGroupsOfFeatures(AppDataCenter.Singleton.ChosenFields);


            int[] optionsPerSol = new int[AppDataCenter.Singleton.ChosenFields.Length];

            for (int i = 0; i < optionsPerSol.Length; i++)
            {
                if (groups.SelectMany(a => a).Contains(i))
                {
                    optionsPerSol[i] = 2;
                }
                else
                {
                    optionsPerSol[i] = 1;
                }
            }

            ClassifierService.Service.InitFeatureslevel2(groups);

            ClassifierService.Service.Prepareraindatafalsegrouping(TrainData, optionsPerSol, 0, AppDataCenter.Singleton.AddConsoleMessage);

            // prepare candidates for not group with test Data and move them into the build

            ClassifierService.Service.buildModelLevel2(AppDataCenter.Singleton.ChosenFields, TrainData, groups, Configuration, AppDataCenter.Singleton.AddConsoleMessage);


            ClassifierService.Service.PrepareCandidateslevel2Test(TestData, optionsPerSol, 0, AppDataCenter.Singleton.AddConsoleMessage);

            TestData.ForEach(doc =>
            {
                solutionData chosenSolution = null;
                double lastProb             = -100;
                doc.solutionCandidates.ToList().ForEach(
                    sol =>
                {
                    double conf;
                    ClassifierService.Service.GetDescitionLevel2(sol.features, out conf);
                    sol.Confidance = conf;
                    if (conf > lastProb)
                    {
                        chosenSolution = sol;
                        lastProb       = conf;
                    }
                }
                    );
                doc.chosenSolution = chosenSolution;
            });


            ConcurrentBag <FieldReportItem> fieldBag = new ConcurrentBag <FieldReportItem>();
            int index = 0;

            double TotalGolden  = 0;
            double TotalMatched = 0;

            foreach (DocumentData doc1 in TestData)
            {
                var startTimeFeatures = DateTime.Now;

                TimeSpan FeatureCalcSpan = new TimeSpan(0);
                double   NumGolden       = 0;
                double   NumMatched      = 0;


                foreach (var r in AppDataCenter.Singleton.ChosenFields.Select((x, i) => new { Value = x, Index = i }))
                {
                    //   if (doc1.Candidates.Where(a => a.NameFromTypist == r.Value).ToList().Count() <= 0) continue;
                    NumGolden++;


                    FieldReportItem fieldReportItem = new FieldReportItem(r.Value, r.Index, doc1.chosenSolution.offeredSolution[r.Index], doc1);

                    if (fieldReportItem.IsMatch)
                    {
                        NumMatched++;
                    }

                    /*  }
                     * else
                     * {
                     *    fieldReportItem.isRejected = true;
                     *
                     * }*/

                    Trace.WriteLine("Doc : " + doc1.DocumentName + ",Field : " + r.Value + " ,Recognized : " + fieldReportItem.MatchedName + " content : " + doc1.chosenSolution.offeredSolution[r.Index].Content + "Match : " + (fieldReportItem.IsMatch ? " True" : " False"));



                    fieldBag.Add(fieldReportItem);
                }
                TotalGolden  += NumGolden;
                TotalMatched += NumMatched;
                AppDataCenter.Singleton.AddConsoleMessage(index++ + " Page : " + doc1.DocumentName + " Getting Features (sec) : " + FeatureCalcSpan.TotalSeconds + " success : " + Math.Round(NumMatched / NumGolden * 100) + "%");
            }
            AppDataCenter.Singleton.AddConsoleMessage(" Total Matched :  " + Math.Round(TotalMatched / TotalGolden * 100, 1) + "%");



            // calculate success

            //     print result
        }