Пример #1
0
        public IEnumerable<RecognizedSegment> PerformWhitespaceLookup(HeuristicSet unlabledHeuristic, IProgressReporter progress = null)
        {
            progress = progress ?? new EmptyProgressReporter();

            int heuristicCount = unlabledHeuristic.Heuristics.Count;
            Dictionary<ReferenceLabel, double[]> probabilityFromEachHeuristic = new Dictionary<ReferenceLabel, double[]>();
            Dictionary<ReferenceLabel, double[]> lblComparisonResults = new Dictionary<ReferenceLabel, double[]>();
            Dictionary<ReferenceLabel, double> labelProbability = new Dictionary<ReferenceLabel, double>();

            ReferenceLabel whitespace = null, allLabels = null;
            for (int i = 0; i < Library.Count; i++) {
                if (Library[i].Label == "whitespace")
                    whitespace = Library[i];
                else if (Library[i].Label == "AllLabels")
                    allLabels = Library[i];

                if (whitespace != null && allLabels != null)
                    break;
            }

            var sampleCount = whitespace.Samples.Count + allLabels.Samples.Count;

            progress.Maximum = heuristicCount * sampleCount + 2 * heuristicCount;

            probabilityFromEachHeuristic[whitespace] = new double[heuristicCount];
            probabilityFromEachHeuristic[allLabels] = new double[heuristicCount];
            lblComparisonResults[whitespace] = new double[heuristicCount];
            lblComparisonResults[allLabels] = new double[heuristicCount];

            for (int heurIdx = 0; heurIdx < heuristicCount; heurIdx++) {
                foreach (var item in whitespace.Samples) {
                    progress.Progress++;
                    if (unlabledHeuristic.GetAtIndex(heurIdx) == item.Heuristics[heurIdx]) {
                        lblComparisonResults[whitespace][heurIdx]++;
                    }
                }
                foreach (var item in allLabels.Samples) {
                    progress.Progress++;
                    if (unlabledHeuristic.GetAtIndex(heurIdx) == item.Heuristics[heurIdx]) {
                        lblComparisonResults[allLabels][heurIdx]++;
                    }
                }
                lblComparisonResults[whitespace][heurIdx] = lblComparisonResults[whitespace][heurIdx] / (double)whitespace.Samples.Count;
                lblComparisonResults[allLabels][heurIdx] = lblComparisonResults[allLabels][heurIdx] / (double)allLabels.Samples.Count;
            }
            Debug.Assert(progress.Progress == heuristicCount * sampleCount);

            double heuristicProbabilisticIndication;
            double multiplicativeOffset;
            double aprioriProb = 1.0 / (double)2;
            double factorIncrease = (1.0 - aprioriProb) / aprioriProb;

            foreach (var label in new[] { whitespace, allLabels }) {
                labelProbability[label] = 1.0 / (double)2;
                for (int heurIdx = 0; heurIdx < heuristicCount; heurIdx++) {
                    progress.Progress++;

                    double comparisonToThisLabel = lblComparisonResults[label][heurIdx];
                    double comparisonToOtherLabels = lblComparisonResults.Sum(h => h.Value[heurIdx]) - comparisonToThisLabel;

                    if (comparisonToThisLabel + comparisonToOtherLabels != 0) {
                        heuristicProbabilisticIndication = comparisonToThisLabel / (comparisonToThisLabel + comparisonToOtherLabels);
                        heuristicsControl.buildHeuristicProbabilityHistorgram(heuristicProbabilisticIndication, Library.IndexOf(label), heurIdx);
                        multiplicativeOffset = label.Variances.Append(heuristicProbabilisticIndication);
                        multiplicativeOffset += aprioriProb / (double)label.Variances.Count;

                        if (multiplicativeOffset < double.MaxValue)
                            labelProbability[label] *= (factorIncrease * (heuristicProbabilisticIndication + multiplicativeOffset)) / (1 - heuristicProbabilisticIndication + multiplicativeOffset);

                        if (double.IsInfinity(labelProbability[label]) || labelProbability[label] == 0) {
                            progress.Progress += heuristicCount - heurIdx - 1;
                            break;
                        }
                    }
                }
                if (label.Samples.Count > 0)
                    yield return new RecognizedSegment(unlabledHeuristic.Bounds, label.Label, labelProbability[label]);
            }
            Debug.Assert(progress.Progress == progress.Maximum);
        }
Пример #2
0
        private IEnumerable<RecognizedSegment> GetMatchesIterator(HeuristicSet unlabledHeuristic, IProgressReporter progress)
        {
            int whitespaceIdx = int.MinValue,
                allLabelsIdx = int.MinValue;
            for (int i = 0; i < Library.Count; i++) {
                if (Library[i].Label == "whitespace") {
                    whitespaceIdx = i;
                } if (Library[i].Label == "AllLabels") {
                    allLabelsIdx = i;
                } if (whitespaceIdx != int.MinValue && allLabelsIdx != int.MinValue) {
                    i = Library.Count;
                }
            }
            HashSet<int> dontCheck = new HashSet<int>() { whitespaceIdx, allLabelsIdx };

            progress = progress ?? new EmptyProgressReporter();

            int heuristicCount = unlabledHeuristic.Heuristics.Count;
            double[][] probabilityFromEachHeuristic = new double[Library.Count][];
            double[][] lblComparisonResults = new double[Library.Count][];
            double[] labelProbability;

            var totalSampleCount = Library.Where((_, i) => !dontCheck.Contains(i)).Sum(rl => rl.Samples.Count);

            progress.Maximum = heuristicCount * totalSampleCount + heuristicCount * (Library.Count - 2);

            //double[] totalComparison_test = new double[numberOfLabelsToCount];
            for (int i = 0; i < Library.Count; i++) {
                probabilityFromEachHeuristic[i] = new double[heuristicCount];
                lblComparisonResults[i] = new double[heuristicCount];
            }

            for (int heurIdx = 0; heurIdx < heuristicCount; heurIdx++) {
                for (int lblIdx = 0; lblIdx < Library.Count; lblIdx++) {
                    while (dontCheck.Contains(lblIdx))
                        lblIdx++;
                    if (lblIdx == Library.Count) break;

                    var current = Library[lblIdx];
                    foreach (var item in current.Samples) {
                        progress.Progress++;
                        if (unlabledHeuristic.GetAtIndex(heurIdx) == item.Heuristics[heurIdx])
                            lblComparisonResults[lblIdx][heurIdx]++;
                    }
                }
                for (int labelIndex = 0; labelIndex < Library.Count; labelIndex++) {
                    while (dontCheck.Contains(labelIndex)) {
                        labelIndex++;
                    }
                    if (labelIndex != Library.Count) {
                        lblComparisonResults[labelIndex][heurIdx] = lblComparisonResults[labelIndex][heurIdx] / (double)Library[labelIndex].Samples.Count;
                    }
                }
            }
            Debug.Assert(progress.Progress == heuristicCount * totalSampleCount);

            //We are working to produce two DSs: lblComparisonResults[][], totalComparison_test[]
            double heuristicProbabilisticIndication;
            double multiplicativeOffset;
            labelProbability = new double[Library.Count];
            double aprioriProb = 1.0 / ((double)Library.Count - dontCheck.Count());
            double factorIncrease = (1.0 - aprioriProb) / aprioriProb;
            //factorIncrease+=10;
            for (int inspectionLbl = 0; inspectionLbl < Library.Count; inspectionLbl++) {
                while (dontCheck.Contains(inspectionLbl))
                    inspectionLbl++;

                if (inspectionLbl == Library.Count) break;
                labelProbability[inspectionLbl] = 1.0 / ((double)Library.Count - dontCheck.Count());
                for (int heurIdx = 0; heurIdx < heuristicCount; heurIdx++) {
                    progress.Progress++;

                    double comparisonToThisLabel = lblComparisonResults[inspectionLbl][heurIdx];
                    double comparisonToOtherLabels = lblComparisonResults.Sum(h => h[heurIdx]) - comparisonToThisLabel;

                    if (comparisonToThisLabel + comparisonToOtherLabels != 0) {
                        heuristicProbabilisticIndication = comparisonToThisLabel / (comparisonToThisLabel + comparisonToOtherLabels);
                        heuristicsControl.buildHeuristicProbabilityHistorgram(heuristicProbabilisticIndication, inspectionLbl, heurIdx);
                        multiplicativeOffset = Library[inspectionLbl].Variances.Append(heuristicProbabilisticIndication);
                        multiplicativeOffset += aprioriProb / (double)Library[inspectionLbl].Variances.Count;

                        if (multiplicativeOffset < double.MaxValue)
                            labelProbability[inspectionLbl] *= (factorIncrease * heuristicProbabilisticIndication + multiplicativeOffset) / (1 - heuristicProbabilisticIndication + multiplicativeOffset);

                        if (double.IsInfinity(labelProbability[inspectionLbl]) || labelProbability[inspectionLbl] == 0) {
                            progress.Progress += heuristicCount - heurIdx - 1;
                            break;
                        }
                    }
                }

                if (Library[inspectionLbl].Samples.Count > 0) {
                    yield return new RecognizedSegment(unlabledHeuristic.Bounds, Library[inspectionLbl].Label, labelProbability[inspectionLbl]);
                }
            }
            Debug.Assert(progress.Progress == progress.Maximum);
        }