예제 #1
0
        //======================17========================
        //Log Overstemming results
        private void OverCalcToLog(OverCalc oc)
        {
            //oc = new OverCalc();
            //Over stemming metrics
            double strongGDNT = oc.getStrongGDNT();
            double strongGWMT = oc.getStrongGWMT();
            double strongGAMT = oc.getStrongGAMT();
            double strongOIG  = oc.getStrongOIG();
            double strongOIL  = oc.getStrongOIL();
            double strongDI   = oc.getStrongDI();
            double weakGDNT   = oc.getWeakGDNT();
            double weakGWMT   = oc.getWeakGWMT();
            double weakGAMT   = oc.getWeakGAMT();
            double weakOIG    = oc.getWeakOIG();
            double weakOIL    = oc.getWeakOIL();
            double weakDI     = oc.getWeakDI();

            //MyLogger.MyLog.WriteToLog(false,false,"","\nOverCalc Results for dtSearch Stemmer\n","");
            //=================OverCalc==================
            string strStrongGDNT = strongGDNT.ToString("0");
            //MyLogger.MyLog.WriteToLog(false,false,"sGDNT= ",strStrongGDNT,"");

            string strStrongGWMT = strongGWMT.ToString("0");
            //MyLogger.MyLog.WriteToLog(false,false,"sGWMT= ",strStrongGWMT,"");

            string strStrongGAMT = strongGAMT.ToString("0");
            //MyLogger.MyLog.WriteToLog(false,false,"sGAMT= ",strStrongGAMT,"");

            string strStrongOIG = strongOIG.ToString("0.000000");
            //MyLogger.MyLog.WriteToLog(false,false,"sOIG= ",strStrongOIG,"");

            string strStrongOIL = strongOIL.ToString("0.000000");
            //MyLogger.MyLog.WriteToLog(false,false,"sOIL= ",strStrongOIL,"");

            string strStrongDI = strongDI.ToString("0.00");
            //MyLogger.MyLog.WriteToLog(false,false,"sDI= ",strStrongDI ,"");

            //--------------------------------------
            string strWeakGDNT = weakGDNT.ToString("0");
            //MyLogger.MyLog.WriteToLog(false,false,"wGDNT= ",strWeakGDNT,"");

            string strWeakGWMT = weakGWMT.ToString("0");
            //MyLogger.MyLog.WriteToLog(false,false,"wGWMT = ",strWeakGWMT ,"");

            string strWeakGAMT = weakGAMT.ToString("0");
            //MyLogger.MyLog.WriteToLog(false,false,"wGAMT= ",strWeakGAMT ,"");

            string strWeakOIG = weakOIG.ToString("0.000000");
            //MyLogger.MyLog.WriteToLog(false,false,"wOIG= ",strWeakOIG ,"");

            string strWeakOIL = weakOIL.ToString("0.000000");
            //MyLogger.MyLog.WriteToLog(false,false,"wOIL= ",strWeakOIL ,"");

            string strWeakDI = weakDI.ToString("0.00");
            //MyLogger.MyLog.WriteToLog(false,false,"wDI= ",strWeakDI ,"");
        }
예제 #2
0
        void ListResults()
        {
            //added timer for diagnostics 14 Aug 2013
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();


            try {
                string si  = strPath + "\\StrongRegrouped" + FileA;        //added 27 June 2013
                string wni = strPath + "\\WeakNumbered" + FileA;
                string sni = strPath + "\\StrongNumbered" + FileA;
                string wi  = strPath + "\\WeakRegrouped" + FileA;


                // Convert string[] to List<>
                List <string> listlA = new List <string>(ListA);
                List <string> listlB = new List <string>(ListB);

                //get unique words count
                var distinctWordsA = new List <string>(listlA.Distinct());
                var distinctWordsB = new List <string>(listlB.Distinct());



                decimal dmwc = w / s;
                //decimal truncatedMwc = decimal.Truncate((dmwc * 100m) / 100m);

                decimal dcf = ((w - s) / w);
                //decimal truncatedCf = decimal.Truncate((dcf * 100m) / 100m);

                //labelConflationClassSize.Text = "Mean Conflation Class size = " + String.Format("{0:0.###}", dmwc);
                //labelCompressionFactor.Text = "Compression Factor = " + String.Format("{0:0.###}", dcf);

                int     editDistance    = 0;
                int     hammingDistance = 0;
                decimal relativeModifiedHammingDistance = 0;

                iSum  = 0;
                iSumR = 0;
                double meanMHD     = 0;
                double meanRelMHD  = 0;
                double SSMLancs    = 0;
                double meanLengthA = 0;
                double meanLengthB = 0;
                iSumLenA = 0;
                iSumLenB = 0;
                int      j       = 1;
                int      i       = 0;
                int      count   = 1;
                double[] numbers = new double[4];


                if (lengthA == lengthB)
                {
                    //added 13 Aug 2013
                    listViewResults.BeginUpdate();


                    for (i = 0; i < ListA.Length; i++)
                    {
                        editDistance = LevenshteinDistance.Compute(ListA[i], ListB[i]);



                        if ((editDistance >= numericUpDownMin.Value) && (editDistance <= numericUpDownMax.Value))
                        {
                            hammingDistance = ClassHammingDistanceSimple.ModifiedHammingDistance(ListA[i], ListB[i]);
                            relativeModifiedHammingDistance = ClassHammingDistanceSimple.RelativeModifiedHammingDistance(ListA[i], ListB[i]);


                            if ((ListA[i].Contains("====")) || (ListA[i].Contains("----")) || (ListA[i].Contains("//")))
                            {
                                //do nothing
                            }
                            else
                            {
                                count = j++;                                 //loop once for each word in the array ListA = string[]

                                numbers = AddItemsToListViewByRange(count, i, hammingDistance, relativeModifiedHammingDistance, editDistance);
                            }
                        }


                        backgroundWorker1.ReportProgress(count - 1);
                    }     //end of loop


                    listViewResults.EndUpdate();

                    listViewResults.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize);


                    iSumLenA = numbers[0];
                    iSumLenB = numbers[1];
                    iSum     = numbers[2];          //sum of MHD column (MHD)
                    iSumR    = numbers[3];          //sum of RelMHD column


                    meanLengthA = (double)iSumLenA / listViewResults.Items.Count;
                    meanLengthB = (double)iSumLenB / listViewResults.Items.Count;
                    meanMHD     = (double)iSum / listViewResults.Items.Count;

                    meanRelMHD = (double)(iSumR / listViewResults.Items.Count);


                    //SSMLancs = (double)100 * (1 - (meanRelMHD/lengthA)); //TODO 13 AUG 2013	accuracy??
                    SSMLancs = (double)100 * (1 - meanRelMHD);             //FIXED 18 Aug 2013 but meanRelMHD not accurate



                    labelMeanLengthA.Text = "List A Mean Word Length = " + String.Format("{0:0.###}", meanLengthA.ToString("N3"));            //13 Aug 2013 added ToString("N3")
                    labelMeanLengthB.Text = "List B Mean Word Length = " + String.Format("{0:0.###}", meanLengthB.ToString("N3"));            //13 Aug 2013 added ToString("N3")

                    int UniqueWordsA = distinctWordsA.Count - GetWordsFromFile.IntBarrierCount;
                    int UniqueWordsB = distinctWordsB.Count - GetWordsFromFile.IntBarrierCount;


                    labelMeanCharsRemoved.Text = "Mean Characters Removed = " + String.Format("{0:0.###}", (meanLengthA - meanLengthB).ToString("N3"));            //13 Aug 2013 added ToString("N3")


                    //Similarity metrics
                    this.labelInverseMeanMHD.Text = "Inverse Mean MHD = " + String.Format("{0:0.###}", (1 / meanMHD).ToString("N3")); //13 Aug 2013 add ToString("N3")
                    this.labelSSM.Text            = "SSM* = " + String.Format("{0:0.###}", SSMLancs.ToString("N3")) + "%";            //13 Aug 2013 added ToString("N3")

                    //added 6 Aug 2013 to get % accurate to three decimal places
                    int    ItemsCount      = listViewResults.Items.Count;
                    double PercentageDiffs = (double)(100 * ItemsCount) / lengthA;

                    labelDiffCount.Text         = "Total = " + listViewResults.Items.Count + " from " + lengthA.ToString() + " [" + String.Format("{0:0.###}", PercentageDiffs.ToString("N3")) + "%]";
                    toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 85;           // 85% progress..



                    //if both files have barriers then show OI and UI metrics
                    if (barrierA && barrierB)
                    {
                        StemmingErrorGroupBox.Enabled = true;


                        porterData sortedData = porterData.fileToMemory(FileBpath);

                        UnderCalc uc = new UnderCalc(sortedData);

                        UnderCalc.pd_calculateResults();

                        double strongUI = uc.getStrongUI();
                        double weakUI   = uc.getWeakUI();

                        labelUnderStemSOnly.Text = String.Format("{0:0.######}", weakUI.ToString("N6"));
                        labelUnderStemSW.Text    = String.Format("{0:0.######}", strongUI.ToString("N6"));

                        toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 90; // 90% progress..
                        this.Update();                                                            // force update since ui thread is otherwise blocked


                        AssignNumber assignNumber = new AssignNumber(sortedData);

                        porterData weakNumberedData   = AssignNumber.pD_weakAssignNumber("WeakNumbered");
                        porterData strongNumberedData = AssignNumber.pD_strongAssignNumber("StrongNumbered");

                        int samplesize = AssignNumber.getSampleSize();

                        Regroup regroup = new Regroup(weakNumberedData);

                        porterData weakRegroupedData = regroup.pd_regroupfile("WeakRegrouped");

                        regroup = new Regroup(strongNumberedData);

                        porterData strongRegroupedData = regroup.pd_regroupfile("StrongRegrouped");


                        OverCalc oc = new OverCalc(strongRegroupedData, weakRegroupedData, strongNumberedData, weakNumberedData, samplesize);

                        OverCalc.pd_calculateResults();

                        string str1 = new FileInfo(FileApath).Directory.FullName;
                        string str2 = new FileInfo(FileApath).Name;



//					Regroup regroup = new Regroup(str1 + "/WeakNumbered" + str2);
//					regroup.regroupfile(str1 + "/WeakRegrouped" + str2);
//
//					regroup = new Regroup(str1 + "/StrongNumbered" + str2);
//					regroup.regroupfile(str1 + "/StrongRegrouped" + str2);
//

                        toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 92; // 92% progress..
                        this.Update();                                                            // force update since ui thread is otherwise blocked


                        toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 96; // 92% progress..
                        this.Update();                                                            // force update since ui thread is otherwise blocked

                        // HACK MemoryBarrier Tom 21/10/2015 see http://stackoverflow.com/questions/5996267/c-sharp-enforcing-order-of-statement-execution
                        // Just added on suspicion that the below code somehow was getting executed before calculateResults finishes
                        System.Threading.Thread.MemoryBarrier();


                        double strongOI = oc.getStrongOIG();
                        double SW       = (double)strongOI / strongUI;

                        double weakOIG = oc.getWeakOIG();


                        string OverStemmingIndex = strongOI.ToString("N8");

                        labelOverStemSWG.Text    = String.Format("{0:0.########}", OverStemmingIndex);
                        labelOverStemSOnlyG.Text = String.Format("{0:0.########}", weakOIG);


                        double strongOIL = oc.getStrongOIL();
                        double weakOIL   = oc.getWeakOIL();
                        labelOverStemSOnlyL.Text = String.Format("{0:0.########}", weakOIL);
                        labelOverStenSWL.Text    = String.Format("{0:0.########}", strongOIL);



                        //Added 22 DEC 2013 for ERRT Calc
                        double[] weakUIarray    = new double[7];
                        double[] weakOILarray   = new double[7];
                        double[] strongUIarray  = new double[7];
                        double[] strongOILarray = new double[7];

                        //Tom 14/10/2015 - Adding logic for ERRT Calculation
                        //Used SwingInterface.java lines 743 - 795
                        //from Lancaster Univ program as reference

                        weakUIarray[0]    = uc.getWeakUI();
                        weakOILarray[0]   = oc.getWeakOIL();
                        strongUIarray[0]  = uc.getStrongUI();
                        strongOILarray[0] = oc.getStrongOIL();


                        toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 97; // 92% progress..
                        this.Update();                                                            // force update since ui thread is otherwise blocked
                        //Java 743

                        sortedData = porterData.fileToMemory(FileApath);

                        for (i = 3; i < 9; i++)
                        {
                            Truncator truncator = new Truncator(sortedData);



                            porterData truncatedData = Truncator.pd_truncate("truncated", i);
                            uc = new UnderCalc(truncatedData);

                            UnderCalc.pd_calculateResults();

//		                AssignNumber asn = new AssignNumber(str1 + "truncated" + i + str2);
                            AssignNumber asn = new AssignNumber(truncatedData);
                            porterData   weakNumberedTData = AssignNumber.pD_weakAssignNumber("WeakNumberedT");
//		                AssignNumber.strongAssignNumber(str1 + "StrongNumberedT" + i + str2);
                            porterData strongNumberedTData = AssignNumber.pD_strongAssignNumber("StrongNumberedT");
                            samplesize = AssignNumber.getSampleSize();

//		                regroup = new Regroup(str1 +  "WeakNumberedT" + i + str2);
//		                regroup.regroupfile(str1 + "WeakRegroupedT" + i + str2);
//		                regroup = new Regroup(str1 + "StrongNumberedT" + i + str2);
//		                regroup.regroupfile(str1 + "StrongRegroupedT" + i +str2);


                            regroup = new Regroup(weakNumberedTData);
                            porterData weakRegroupedTData = regroup.pd_regroupfile("WeakRegroupedT");
                            regroup = new Regroup(strongNumberedTData);
                            porterData strongRegroupedTData = regroup.pd_regroupfile("StrongRegroupedT");


//		                oc = new OverCalc(str1 + "StrongRegroupedT" + i + str2,
//		                                 str1 + "WeakRegroupedT" + i + str2,
//		                                 str1 + "StrongNumberedT" + i + str2,
//		                                str1 + "WeakNumberedT" + i + str2, samplesize);

                            oc = new OverCalc(strongRegroupedTData,
                                              weakRegroupedTData,
                                              strongNumberedTData,
                                              weakNumberedTData, samplesize);

                            OverCalc.pd_calculateResults();

                            weakUIarray[i - 2]    = uc.getWeakUI();
                            weakOILarray[i - 2]   = oc.getWeakOIL();
                            strongUIarray[i - 2]  = uc.getStrongUI();
                            strongOILarray[i - 2] = oc.getStrongOIL();

                            toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 98; // 92% progress..
                            this.Update();                                                            // force update since ui thread is otherwise blocked
                        }



                        //Changed last Parameter of ERRT Constructor to match Line 800 SwingInterface.java of original Lancs program..
                        //Although, parameter names do not seem to match to constructor (using weakOILarray in sOIL spot)

                        ERRT errt;

                        if (stemming_mode == MODE_DTS)
                        {
                            errt = new ERRT(weakUIarray, weakOILarray, strongUIarray, strongOILarray);
                        }
                        else
                        {
                            errt = new ERRT(weakUIarray, weakOILarray, strongUIarray, weakOILarray);
                        }

                        //double[] wUI, wOIL, sUI, sOIL
                        // Tom 21/10/2015 - Changed last parameter from strongOILarray to weakOILarray to match Java 801
                        //ERRT errt = new ERRT(weakUIarray ,weakOILarray, strongUIarray, weakOILarray);
                        errt.CalculateResults();



                        double derrt = errt.getStrongERRTL();
                        //MessageBox.Show("derrt" + derrt);
                        //labelERRT.Text = "ERRT = " + String.Format("{0:0.#######}", derrt.ToString("N6"));


                        // Tom 26/10/2015 see SwingInterface.java line 814 &
                        // the answer @ http://stackoverflow.com/questions/2924242/replicating-javas-decimalformat-in-c-sharp

                        string sws = "Test" + (errt.getWeakSWL() * 100.00).ToString();
                        sws = (errt.getWeakSWL() * 100).ToString("N2");
                        labelSWSOnly.Text = sws;


                        string swlText = (errt.getStrongSWL() * 100.00).ToString();
                        swlText        = (errt.getStrongSWL() * 100).ToString("N2");
                        labelSWSW.Text = swlText;

                        string errtSOnlytext = (errt.getWeakERRTL() * 100).ToString("N2");

                        string errtText = (errt.getStrongERRTL() * 100.00).ToString("N2");
                        labelERRTSW.Text    = errtText;
                        labelERRTSOnly.Text = errtSOnlytext;



                        System.Diagnostics.Debug.WriteLine("MainForm 821: UnderCalc.numDMTCalls= " + UnderCalc.numDMTCalls);
                        System.Diagnostics.Debug.WriteLine("MainForm 822: OverCalc numStrongOILZero =  " + OverCalc.numStrongOILZero + ", numStrongOILNonZero = " + OverCalc.numStrongOILNonZero);
                        // tom 03/11/2015 writing out list of DMT(); results from underCalc to a file
                        string DMTData = UnderCalc.sb.ToString();
                        //System.IO.StreamWriter file = new System.IO.StreamWriter("c:\\DMT_Test\\DMT_LA.txt");
                        //file.WriteLine(DMTData);
                        //file.Close();
                    }
                    else
                    {
                        StemmingErrorGroupBox.Enabled = false;
                    }


                    //MessageBox.Show(j.ToString());
                }
                else
                {
                    MessageBox.Show("Lengths of Lists are not the same", "ListResults");
                }
            } catch (Exception ex) {
                MessageBox.Show(ex.Message + "\n \n" + ex.StackTrace, "ListResults");
                //throw;
            }
            TimeSpan ts = stopWatch.Elapsed;
            // Format and display the TimeSpan value.
            string elapsedTime = String.Format("{0:00}:{1:00}.{2:00}", ts.Minutes, ts.Seconds, ts.Milliseconds / 10);

            toolStripStatusLabelElapsedTime.Text = elapsedTime;
            stopWatch.Stop();

            toolStripProgressBar1.Value = toolStripProgressBar1.Maximum; // finished
            this.Update();                                               // force update since ui thread is otherwise blocked
        }