Ejemplo n.º 1
0
        private void RefreshCumulativeProbabilityChart()
        {
            // Get the cumulative probabilities for marked and unmarked batches and add them to the graph

            InitializeCumulativeProbabilityDistributionChart();

            //// Format the chart
            ////standardSeries.ChartType = SeriesChartType.RangeColumn;
            ////standardSeries.BorderWidth = 1;
            ////standardSeries.BorderDashStyle = ChartDashStyle.Solid;
            ////standardSeries.BorderColor = Color.Black;
            ////standardSeries.Color = Color.Blue;
            //CdfChart.Series.Clear();
            //CdfChart.Titles.Clear();
            //CdfChart.Titles.Add("Cumulative Probability Distribution");
            ////CdfChart.Legends[0].Position.Auto = true; //ElementPosition
            //CdfChart.Legends[0].IsDockedInsideChartArea = true;
            //CdfChart.Legends[0].Docking = Docking.Bottom;
            //CdfChart.Legends[0].Alignment = StringAlignment.Center;
            //CdfChart.ChartAreas[0].AxisX.Title = "Packets per Interval";
            //CdfChart.ChartAreas[0].AxisX.Minimum = 0;
            ////CumulativeChart.ChartAreas[0].AxisX.Maximum =

            //// Get the type of chart to display
            //string chartType = ChartTypeComboBox.Items[ChartTypeComboBox.SelectedIndex].ToString();

            //// Marked probabilities series
            //CdfChart.Series.Add("MarkedProbabilities");
            //CdfChart.Series["MarkedProbabilities"].ChartType = SeriesChartType.Line;
            ////CdfChart.Series["MarkedProbabilities"].ChartType = chartType == "Bar" ? SeriesChartType.Column : SeriesChartType.Line;
            //CdfChart.Series["MarkedProbabilities"].IsVisibleInLegend = true;
            //CdfChart.Series["MarkedProbabilities"].LegendText = "Marked";

            //// Unmarked probabilities series
            //CdfChart.Series.Add("UnmarkedProbabilities");
            //CdfChart.Series["UnmarkedProbabilities"].ChartType = SeriesChartType.Line;
            ////CdfChart.Series["UnmarkedProbabilities"].ChartType = chartType == "Bar" ? SeriesChartType.Column : SeriesChartType.Line;
            //CdfChart.Series["UnmarkedProbabilities"].IsVisibleInLegend = true;
            //CdfChart.Series["UnmarkedProbabilities"].LegendText = "Unmarked";

            //// Get the batch intervals
            //BindingList<BatchIntervalMarked> unmarkedBatchIntervals = new BindingList<BatchIntervalMarked>();
            //BindingList<BatchIntervalMarked> markedBatchIntervals = new BindingList<BatchIntervalMarked>();

            ////int histogramBinSize = Convert.ToInt32(HistogramBinSizeTextBox.Text);
            //SortedDictionary<int, decimal> markedProbabilities = new CalculateProbability(markedBatchIntervals).GetCumulativeProbabilityDistribution(_CumulativeMarkedProbabilities);
            //SortedDictionary<int, decimal> unmarkedProbabilities = new CalculateProbability(unmarkedBatchIntervals).GetCumulativeProbabilityDistribution(_CumulativeUnmarkedProbabilities);

            // Retrieve the cumulative histogram probabilities from the database
            ProcessCapturePackets pcp = new ProcessCapturePackets();

            BindingList<CumulativeProbabilityDistribution> markedCumulativeProbabilityDistributionList = new BindingList<CumulativeProbabilityDistribution>();
            markedCumulativeProbabilityDistributionList = pcp.GetCumulativeProbabilityDistributionData(CaptureState.Marked);
            SortedDictionary<int, decimal> markedCumulativeProbabilityDistribution = new SortedDictionary<int, decimal>();
            foreach (CumulativeProbabilityDistribution cpd in markedCumulativeProbabilityDistributionList)
            {
                markedCumulativeProbabilityDistribution.Add(cpd.Interval, cpd.Probability);
            }

            CdfChart.Series["MarkedProbabilities"].Color = Color.CornflowerBlue;

            foreach (KeyValuePair<int, decimal> pair in markedCumulativeProbabilityDistribution)
            {
                CdfChart.Series["MarkedProbabilities"].Points.AddXY(Convert.ToDouble(pair.Key), Convert.ToDouble(pair.Value));
            }

            BindingList<CumulativeProbabilityDistribution> unmarkedCumulativeProbabilityDistributionList = new BindingList<CumulativeProbabilityDistribution>();
            unmarkedCumulativeProbabilityDistributionList = pcp.GetCumulativeProbabilityDistributionData(CaptureState.Unmarked);
            SortedDictionary<int, decimal> unmarkedCumulativeProbabilityDistribution = new SortedDictionary<int, decimal>();
            foreach (CumulativeProbabilityDistribution cpd in unmarkedCumulativeProbabilityDistributionList)
            {
                unmarkedCumulativeProbabilityDistribution.Add(cpd.Interval, cpd.Probability);
            }
            CdfChart.Series["UnmarkedProbabilities"].Color = Color.Red;

            foreach (KeyValuePair<int, decimal> pair in unmarkedCumulativeProbabilityDistribution)
            {
                CdfChart.Series["UnmarkedProbabilities"].Points.AddXY(Convert.ToDouble(pair.Key), Convert.ToDouble(pair.Value));
            }
        }
Ejemplo n.º 2
0
        public HypothesisTest GetHypothesisTestResult()
        {
            HypothesisTest ht = new HypothesisTest(); ;

            // Get cumulative probability distribution data and find the max difference between marked and unmarked distributions
            ProcessCapturePackets pcp = new ProcessCapturePackets();
            BindingList<CumulativeProbabilityDistribution> markedCPD = new BindingList<CumulativeProbabilityDistribution>();
            BindingList<CumulativeProbabilityDistribution> unmarkedCPD = new BindingList<CumulativeProbabilityDistribution>();
            markedCPD = pcp.GetCumulativeProbabilityDistributionData(CaptureState.Marked);
            unmarkedCPD = pcp.GetCumulativeProbabilityDistributionData(CaptureState.Unmarked);

            if (markedCPD.Count > 0 && unmarkedCPD.Count > 0)
            {
                decimal maxVariance = 0M;
                int intervalCount = 0;

                // Only compare intervals from each distribution with a corresponding interval in the other distribution
                if (unmarkedCPD.Count > markedCPD.Count)
                {
                    intervalCount = markedCPD.Count;
                }
                else
                {
                    intervalCount = unmarkedCPD.Count;
                }

                // Expand each distribution into equal discrete steps for comparison of cumulative probabilities
                // First, find the largest cumulative packet count (= interval)
                int maxPacketCount = 0;
                if (markedCPD[markedCPD.Count - 1].Interval >= unmarkedCPD[unmarkedCPD.Count - 1].Interval)
                {
                    maxPacketCount = markedCPD[markedCPD.Count - 1].Interval;
                }
                else
                {
                    maxPacketCount = unmarkedCPD[unmarkedCPD.Count - 1].Interval;
                }

                // Second, expand the packet counts by interpolating between packet counts (intervals) using an average probability
                // for each packet count in the range and successively adding up to the next packet count (interval); add these
                // interpolated packets to a dictionary; outcome is a dictionary for each distribution containing packet counts and
                // probabilities from packet count = 0 to packet count = largest packet count (interval) of both distributions and
                // the associated probabilities for each packet count.  We are basically calculating a linear estimate of packet
                // counts and probabilities between each packet count and probability in the actual distributions.

                //// Third, check for packet counts that are less than the maximum packet count and assign a probability of 1
                //// to any that are found

                ExpandPacketCountLinear markedExpPktCount = new ExpandPacketCountLinear(markedCPD, maxPacketCount);
                ExpandPacketCountLinear unmarkedExpPktCount = new ExpandPacketCountLinear(unmarkedCPD, maxPacketCount);
                SortedDictionary<int, decimal> markedCPDExpanded = new SortedDictionary<int, decimal>();
                SortedDictionary<int, decimal> unmarkedCPDExpanded = new SortedDictionary<int, decimal>();
                //markedCPDExpanded = ExpandPacketCount(markedCPD, maxPacketCount);
                //unmarkedCPDExpanded = ExpandPacketCount(unmarkedCPD, maxPacketCount);
                markedCPDExpanded = markedExpPktCount.ExpandPacketCount();
                unmarkedCPDExpanded = unmarkedExpPktCount.ExpandPacketCount();

                // Find the maximum variance between the cumulative probabilities in each distribution
                for (int i = 0; i < maxPacketCount; i++)
                {
                    #region Debug
            #if(DEBUG)
                    System.Diagnostics.Debug.WriteLine("unmarkedCPDExpanded[{0}]:[{1}] - markedCPDExpanded[{2}]:[{3}] = {4}", i, unmarkedCPDExpanded[i], i, markedCPDExpanded[i], Math.Abs(unmarkedCPDExpanded[i] - markedCPDExpanded[i]));
            #endif
                    #endregion
                    if (Math.Abs(unmarkedCPDExpanded[i] - markedCPDExpanded[i]) > maxVariance)
                    {
                        maxVariance = Math.Abs(unmarkedCPDExpanded[i] - markedCPDExpanded[i]);
                    }
                }

                // Multiply by the square root of the sample size factor
                maxVariance = maxVariance * Convert.ToDecimal(Math.Sqrt((markedCPD.Count * unmarkedCPD.Count) / (markedCPD.Count + unmarkedCPD.Count)));

                // Compare the maximum variance with the hypothesis test threshold
                // For significance level alpha = 0.05, the K-S statistic is computed as 1.36/N^(1/2), where N is the number of samples
                decimal ksStatistic = Convert.ToDecimal(1.36 / Math.Pow(intervalCount, 0.5));
                ht.KsStatistic = ksStatistic;
                ht.MaxCpdVariance = maxVariance;
                if (maxVariance > ksStatistic)
                {
                    // Reject the null hypothesis
                    ht.KsTestResult = true;
                }
            }
            else
            {
                // Not enough data to perform the test
                ht.KsStatistic = 0;
                ht.MaxCpdVariance = 0;
                ht.KsTestResult = false;
            }
            return ht;
        }