} //end of null constructor //================= end 4 ==================== //====================== 5====================== //number each stem to associate it with a group public static void CreateFiles(string startPath, string wordListFilePathName, string stemmedFilePathName) { string wordListFileName = Path.GetFileName(wordListFilePathName); string stemmedFileName = Path.GetFileName(stemmedFilePathName); AssignNumber assignnumber = new AssignNumber(stemmedFilePathName); //weak barriers are ignored try{ AssignNumber.weakAssignNumber(startPath + "\\WeakNumbered" + wordListFileName);//WeakNumbered" + str2 + ".txt"); } catch (Exception e) { MessageBox.Show("Error: " + e, "5 - Error when performing weak numbering"); return; } //when weak barriers are treated as strong try{ //MessageBox.Show("Calling strongAssignNumber", "Analyser - Step 5"); AssignNumber.strongAssignNumber(startPath + "\\StrongNumbered" + wordListFileName); } catch (Exception e) { MessageBox.Show("Error: " + e, "5 - Error when performing strong numbering"); return; } samplesize = AssignNumber.getSampleSize(); //==================== end 5 ================== //=================== 6 ======================= //sort numbered file where weak barriers were ignored by stem Regroup rg = new Regroup(startPath + "\\WeakNumbered" + wordListFileName); try{ rg.regroupfile(startPath + "\\WeakRegrouped" + wordListFileName); } catch (Exception e) { MessageBox.Show("Error: " + e + " " + startPath + "\\WeakNumbered" + wordListFileName, "6a - Error when sorting stemmed file "); return; } //sort numbered file where weak barriers are strong by stem Regroup rg2 = new Regroup(startPath + "\\StrongNumbered" + wordListFileName); try{ rg2.regroupfile(startPath + "\\StrongRegrouped" + wordListFileName); } catch (Exception e) { MessageBox.Show("Error: " + e, "6b - Error when sorting stemmed file"); return; } //=================end 6 =================== //=================== 7 ======================== //perform overstemming calcs on newly sorted file try { //MessageBox.Show("samplesize = " + samplesize, "Analyser-7"); // OverCalc oc = new OverCalc(startPath + "\\StrongRegrouped" + wordListFileName, startPath + "\\WeakRegrouped" + wordListFileName, startPath + "\\StrongNumbered" + wordListFileName, startPath + "\\WeakNumbered" + wordListFileName, samplesize); //OverCalc.calculateResults(); // double strongOI = oc.getStrongOIG(); // MessageBox.Show("Over Stemming Index = " + String.Format("{0:0.###}", strongOI.ToString("N3"))); //MainForm.labelOI.Text = "Over Stemming Index = " + String.Format("{0:0.###}", strongOI.ToString("N3")); //OverCalcToLog(oc); } catch (Exception e) { MessageBox.Show("Error: " + e, "7 - Error in overstemming calculator"); } //update progress // this.listBox1.Items.Add("7 - Performing Overstemming Calculations..."); //try { //OverCalc.calculateResults(); //} catch (Exception e) { // MessageBox.Show("Error: " + e, "7 - Error when performing overstemming calculations"); //} //=================== 8 ========================= //place stemming UI and OI(L) values into arrays for ERRT calculations //update progress // this.listBox1.Items.Add("8 - Place Stemming UI & OI(L) values into arrays for ERRT calcs..."); ArrayList weakUIarray = new ArrayList(); ArrayList weakOILarray = new ArrayList(); ArrayList strongUIarray = new ArrayList(); ArrayList strongOILarray = new ArrayList(); //TODO Display both sets of UI, OI(G), and OI(L) //================== end 8 ==================== //===================== 9 =========================== //update progress // this.listBox1.Items.Add("9 - Performing Truncation..."); //================================ Version 1.0 no truncation for ERRT ================== //24 June 2013 remove for loop this version - ends at line 328 //for (int i=3; i<9; i++) //int i=3; //{ //try { // Truncator truncator = new Truncator(wordList); // } catch (Exception e) { // MessageBox.Show("9 - Error: " + e, "Error creating truncation instance"); // } // try { // Truncator.truncate(startPath + "\\truncated" + i + wordListFileName, i); // } catch (Exception e) { // // MessageBox.Show("9 - Error: " + e, "Error when truncating file"); // } //==================end 9 =================== //======================10==================== //perform understemming calculations on stemmed file //UnderCalc undercalc = new UnderCalc(startPath + "\\truncated" + i + wordListFileName); //} catch (Exception e) //MessageBox.Show("Error: " + e, "10 - Error initialising understemming calculator"); //return; //} //update progress // this.listBox1.Items.Add("10 - Perform Understemming Calculations " + i); //try { //UnderCalc.calculateResults(); // } catch (Exception e) { //MessageBox.Show("Error: " + e, "10 - Error performing understemming calculations: " + i); //return; // } //List<Process> list = StemmingTester.Win32Processes.GetProcessesLockingFile("truncated3GroupedWordList.txt"); // foreach (var element in list) { // Console.WriteLine(element); // } //UnderCalcLog(undercalc); //}//temp //======================11=================== //number each stem to associate it with a word //17 June 2013 //string wordListNameNoExt = Path.GetFileNameWithoutExtension(wordList); //wordList = Path.GetFileNameWithoutExtension(wordList); //24 June 2013 - renamed //wordList = Path.GetFileNameWithoutExtension(wordList); //wordList = wordListPath ; //+ "\\" + wordListNameNoExt; stemmedFileName = Path.GetFileName(stemmedFileName); //MessageBox.Show(wordListPath , "wordList part 11"); //try { //assignnumber = new AssignNumber(startPath + "\\truncated" + i + wordListFileName); //assignnumber = new AssignNumber(startPath + "\\truncated" + i + wordList); //assignnumber = new AssignNumber(wordListPath + "\\truncated" + i + stemmedFileName); //} catch (Exception e) { // MessageBox.Show("11 - Error: " + e, "Error creating new Regroup instance"); //} //update progress //this.listBox1.Items.Add("11 - Numbering file..."); //when weak barriers are ignored //try { // AssignNumber.weakAssignNumber(startPath + "\\WeakNumberedT" + i + stemmedFileName); //AssignNumber.weakAssignNumber(wordList + "\\WeakNumberedT" + i + stemmedFileName); // } catch (Exception e ) { //MessageBox.Show("11 - Error: " + e, "Error performing weak Regroup"); //} //weak barriers treated as strong //try { //MessageBox.Show("Calling strongAssignNumber", "frm_Analyser"); //AssignNumber.strongAssignNumber(startPath + "\\StrongNumberedT" + i + stemmedFileName); //} catch (Exception e) { // MessageBox.Show("11 - Error: " + e, "Error performing strong Regroup"); //} samplesize = AssignNumber.getSampleSize(); //===========end 11=================== //=============12======================= //sort the numbered file where weak barriers were ignored by stem //Regroup regroup = new Regroup(startPath + "\\WeakNumberedT" + i + stemmedFileName); //update progress //this.listBox1.Items.Add("12 - Sorting Files"); // try { // regroup.regroupfile(startPath + "\\WeakRegroupedT" + i + stemmedFileName); // } catch (Exception e) { // //MessageBox.Show("Error: " + e, "12a - Error when sorting stemmed file"); //} //sort the numbered file where weak barriers are strong by stem //regroup = new Regroup(startPath + "\\StrongNumberedT" + i + stemmedFileName); //try { //regroup.regroupfile(startPath + "\\StrongRegroupedT" + i + stemmedFileName); //} catch (Exception e) { //MessageBox.Show("Error: " + e, "12b - Error when sorting stemmed file"); //return; //} //============end 12================== //17 June replace str1 str2 with wordList, stemmedFileName //==================13 ======================= //perform over stemming calculations on the newly sorted file // OverCalc overcalc = new OverCalc(startPath + "\\StrongRegroupedT" + i + stemmedFileName, // startPath + "\\WeakRegroupedT" + i + stemmedFileName, // startPath + "\\StrongNumberedT" + i + stemmedFileName, // startPath + "\\WeakNumberedT" + i + stemmedFileName, samplesize); //catch(Exception e){ MessageBox.Show("13 - Error: " + e, "Error when initialising overrstemming calculations"); return; //update progress // this.listBox1.Items.Add("13 - Performing Overstemming Calculations " + i); // try{ OverCalc.calculateResults(); }//TODO why overcalc no go // catch(Exception e){MessageBox.Show("13 - Error: " + e, "Error when performing overstemming calculations"); return;} //==================== 14 ========================= //retrieve UI/OI values try { //weakUIarray[i-2] = undercalc.getWeakUI(); // weakOILarray[i-2] = overcalc.getWeakOIL(); //strongUIarray[i-2] = undercalc.getStrongUI(); // strongOILarray[i-2] = overcalc.getStrongOIL(); //update progress //this.listBox1.Items.Add("14 - retrieve UI/OI values"); } catch (Exception e) { MessageBox.Show("Error: " + e, "14 - Error when retrieving UI/OI values"); } //}//end for //===============end 14=== and end for loop ======== //=============15 ========================== //perform ERRT calculations //has input of 4 arrays from block 14 //try { //17 June 2013 TODO arrayList are input to ERRT but it expects double[] //ERRT errt = new ERRT(weakUIarray, weakOILarray, strongUIarray, strongOILarray); //} catch (Exception e) { //MessageBox.Show("Error: " + e, "Error when performing initialising ERRT calculator"); //} //try //{ //errt.CalculateResults(); //} catch (Exception e) { // MessageBox.Show("Error: " + e, "Error when performing ERRT calculations"); //} //TODO Append ERRT results to display //DecimalFormat perCent = new de //ERRTToLog(); //==============end 15================ //now log results ====================== //weakUIarray.Insert(0, uc.getWeakUI()); //weakOILarray.Insert(0, oc.getWeakOIL()); //strongUIarray.Insert(0, uc.getStrongUI()); //strongOILarray.Insert(0, oc.getStrongOIL()); }
void ListResults() { //added timer for diagnostics 14 Aug 2013 Stopwatch stopWatch = new Stopwatch(); stopWatch.Start(); try { string si = strPath + "\\StrongRegrouped" + FileA; //added 27 June 2013 string wni = strPath + "\\WeakNumbered" + FileA; string sni = strPath + "\\StrongNumbered" + FileA; string wi = strPath + "\\WeakRegrouped" + FileA; // Convert string[] to List<> List <string> listlA = new List <string>(ListA); List <string> listlB = new List <string>(ListB); //get unique words count var distinctWordsA = new List <string>(listlA.Distinct()); var distinctWordsB = new List <string>(listlB.Distinct()); decimal dmwc = w / s; //decimal truncatedMwc = decimal.Truncate((dmwc * 100m) / 100m); decimal dcf = ((w - s) / w); //decimal truncatedCf = decimal.Truncate((dcf * 100m) / 100m); //labelConflationClassSize.Text = "Mean Conflation Class size = " + String.Format("{0:0.###}", dmwc); //labelCompressionFactor.Text = "Compression Factor = " + String.Format("{0:0.###}", dcf); int editDistance = 0; int hammingDistance = 0; decimal relativeModifiedHammingDistance = 0; iSum = 0; iSumR = 0; double meanMHD = 0; double meanRelMHD = 0; double SSMLancs = 0; double meanLengthA = 0; double meanLengthB = 0; iSumLenA = 0; iSumLenB = 0; int j = 1; int i = 0; int count = 1; double[] numbers = new double[4]; if (lengthA == lengthB) { //added 13 Aug 2013 listViewResults.BeginUpdate(); for (i = 0; i < ListA.Length; i++) { editDistance = LevenshteinDistance.Compute(ListA[i], ListB[i]); if ((editDistance >= numericUpDownMin.Value) && (editDistance <= numericUpDownMax.Value)) { hammingDistance = ClassHammingDistanceSimple.ModifiedHammingDistance(ListA[i], ListB[i]); relativeModifiedHammingDistance = ClassHammingDistanceSimple.RelativeModifiedHammingDistance(ListA[i], ListB[i]); if ((ListA[i].Contains("====")) || (ListA[i].Contains("----")) || (ListA[i].Contains("//"))) { //do nothing } else { count = j++; //loop once for each word in the array ListA = string[] numbers = AddItemsToListViewByRange(count, i, hammingDistance, relativeModifiedHammingDistance, editDistance); } } backgroundWorker1.ReportProgress(count - 1); } //end of loop listViewResults.EndUpdate(); listViewResults.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize); iSumLenA = numbers[0]; iSumLenB = numbers[1]; iSum = numbers[2]; //sum of MHD column (MHD) iSumR = numbers[3]; //sum of RelMHD column meanLengthA = (double)iSumLenA / listViewResults.Items.Count; meanLengthB = (double)iSumLenB / listViewResults.Items.Count; meanMHD = (double)iSum / listViewResults.Items.Count; meanRelMHD = (double)(iSumR / listViewResults.Items.Count); //SSMLancs = (double)100 * (1 - (meanRelMHD/lengthA)); //TODO 13 AUG 2013 accuracy?? SSMLancs = (double)100 * (1 - meanRelMHD); //FIXED 18 Aug 2013 but meanRelMHD not accurate labelMeanLengthA.Text = "List A Mean Word Length = " + String.Format("{0:0.###}", meanLengthA.ToString("N3")); //13 Aug 2013 added ToString("N3") labelMeanLengthB.Text = "List B Mean Word Length = " + String.Format("{0:0.###}", meanLengthB.ToString("N3")); //13 Aug 2013 added ToString("N3") int UniqueWordsA = distinctWordsA.Count - GetWordsFromFile.IntBarrierCount; int UniqueWordsB = distinctWordsB.Count - GetWordsFromFile.IntBarrierCount; labelMeanCharsRemoved.Text = "Mean Characters Removed = " + String.Format("{0:0.###}", (meanLengthA - meanLengthB).ToString("N3")); //13 Aug 2013 added ToString("N3") //Similarity metrics this.labelInverseMeanMHD.Text = "Inverse Mean MHD = " + String.Format("{0:0.###}", (1 / meanMHD).ToString("N3")); //13 Aug 2013 add ToString("N3") this.labelSSM.Text = "SSM* = " + String.Format("{0:0.###}", SSMLancs.ToString("N3")) + "%"; //13 Aug 2013 added ToString("N3") //added 6 Aug 2013 to get % accurate to three decimal places int ItemsCount = listViewResults.Items.Count; double PercentageDiffs = (double)(100 * ItemsCount) / lengthA; labelDiffCount.Text = "Total = " + listViewResults.Items.Count + " from " + lengthA.ToString() + " [" + String.Format("{0:0.###}", PercentageDiffs.ToString("N3")) + "%]"; toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 85; // 85% progress.. //if both files have barriers then show OI and UI metrics if (barrierA && barrierB) { StemmingErrorGroupBox.Enabled = true; porterData sortedData = porterData.fileToMemory(FileBpath); UnderCalc uc = new UnderCalc(sortedData); UnderCalc.pd_calculateResults(); double strongUI = uc.getStrongUI(); double weakUI = uc.getWeakUI(); labelUnderStemSOnly.Text = String.Format("{0:0.######}", weakUI.ToString("N6")); labelUnderStemSW.Text = String.Format("{0:0.######}", strongUI.ToString("N6")); toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 90; // 90% progress.. this.Update(); // force update since ui thread is otherwise blocked AssignNumber assignNumber = new AssignNumber(sortedData); porterData weakNumberedData = AssignNumber.pD_weakAssignNumber("WeakNumbered"); porterData strongNumberedData = AssignNumber.pD_strongAssignNumber("StrongNumbered"); int samplesize = AssignNumber.getSampleSize(); Regroup regroup = new Regroup(weakNumberedData); porterData weakRegroupedData = regroup.pd_regroupfile("WeakRegrouped"); regroup = new Regroup(strongNumberedData); porterData strongRegroupedData = regroup.pd_regroupfile("StrongRegrouped"); OverCalc oc = new OverCalc(strongRegroupedData, weakRegroupedData, strongNumberedData, weakNumberedData, samplesize); OverCalc.pd_calculateResults(); string str1 = new FileInfo(FileApath).Directory.FullName; string str2 = new FileInfo(FileApath).Name; // Regroup regroup = new Regroup(str1 + "/WeakNumbered" + str2); // regroup.regroupfile(str1 + "/WeakRegrouped" + str2); // // regroup = new Regroup(str1 + "/StrongNumbered" + str2); // regroup.regroupfile(str1 + "/StrongRegrouped" + str2); // toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 92; // 92% progress.. this.Update(); // force update since ui thread is otherwise blocked toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 96; // 92% progress.. this.Update(); // force update since ui thread is otherwise blocked // HACK MemoryBarrier Tom 21/10/2015 see http://stackoverflow.com/questions/5996267/c-sharp-enforcing-order-of-statement-execution // Just added on suspicion that the below code somehow was getting executed before calculateResults finishes System.Threading.Thread.MemoryBarrier(); double strongOI = oc.getStrongOIG(); double SW = (double)strongOI / strongUI; double weakOIG = oc.getWeakOIG(); string OverStemmingIndex = strongOI.ToString("N8"); labelOverStemSWG.Text = String.Format("{0:0.########}", OverStemmingIndex); labelOverStemSOnlyG.Text = String.Format("{0:0.########}", weakOIG); double strongOIL = oc.getStrongOIL(); double weakOIL = oc.getWeakOIL(); labelOverStemSOnlyL.Text = String.Format("{0:0.########}", weakOIL); labelOverStenSWL.Text = String.Format("{0:0.########}", strongOIL); //Added 22 DEC 2013 for ERRT Calc double[] weakUIarray = new double[7]; double[] weakOILarray = new double[7]; double[] strongUIarray = new double[7]; double[] strongOILarray = new double[7]; //Tom 14/10/2015 - Adding logic for ERRT Calculation //Used SwingInterface.java lines 743 - 795 //from Lancaster Univ program as reference weakUIarray[0] = uc.getWeakUI(); weakOILarray[0] = oc.getWeakOIL(); strongUIarray[0] = uc.getStrongUI(); strongOILarray[0] = oc.getStrongOIL(); toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 97; // 92% progress.. this.Update(); // force update since ui thread is otherwise blocked //Java 743 sortedData = porterData.fileToMemory(FileApath); for (i = 3; i < 9; i++) { Truncator truncator = new Truncator(sortedData); porterData truncatedData = Truncator.pd_truncate("truncated", i); uc = new UnderCalc(truncatedData); UnderCalc.pd_calculateResults(); // AssignNumber asn = new AssignNumber(str1 + "truncated" + i + str2); AssignNumber asn = new AssignNumber(truncatedData); porterData weakNumberedTData = AssignNumber.pD_weakAssignNumber("WeakNumberedT"); // AssignNumber.strongAssignNumber(str1 + "StrongNumberedT" + i + str2); porterData strongNumberedTData = AssignNumber.pD_strongAssignNumber("StrongNumberedT"); samplesize = AssignNumber.getSampleSize(); // regroup = new Regroup(str1 + "WeakNumberedT" + i + str2); // regroup.regroupfile(str1 + "WeakRegroupedT" + i + str2); // regroup = new Regroup(str1 + "StrongNumberedT" + i + str2); // regroup.regroupfile(str1 + "StrongRegroupedT" + i +str2); regroup = new Regroup(weakNumberedTData); porterData weakRegroupedTData = regroup.pd_regroupfile("WeakRegroupedT"); regroup = new Regroup(strongNumberedTData); porterData strongRegroupedTData = regroup.pd_regroupfile("StrongRegroupedT"); // oc = new OverCalc(str1 + "StrongRegroupedT" + i + str2, // str1 + "WeakRegroupedT" + i + str2, // str1 + "StrongNumberedT" + i + str2, // str1 + "WeakNumberedT" + i + str2, samplesize); oc = new OverCalc(strongRegroupedTData, weakRegroupedTData, strongNumberedTData, weakNumberedTData, samplesize); OverCalc.pd_calculateResults(); weakUIarray[i - 2] = uc.getWeakUI(); weakOILarray[i - 2] = oc.getWeakOIL(); strongUIarray[i - 2] = uc.getStrongUI(); strongOILarray[i - 2] = oc.getStrongOIL(); toolStripProgressBar1.Value = (toolStripProgressBar1.Maximum / 100) * 98; // 92% progress.. this.Update(); // force update since ui thread is otherwise blocked } //Changed last Parameter of ERRT Constructor to match Line 800 SwingInterface.java of original Lancs program.. //Although, parameter names do not seem to match to constructor (using weakOILarray in sOIL spot) ERRT errt; if (stemming_mode == MODE_DTS) { errt = new ERRT(weakUIarray, weakOILarray, strongUIarray, strongOILarray); } else { errt = new ERRT(weakUIarray, weakOILarray, strongUIarray, weakOILarray); } //double[] wUI, wOIL, sUI, sOIL // Tom 21/10/2015 - Changed last parameter from strongOILarray to weakOILarray to match Java 801 //ERRT errt = new ERRT(weakUIarray ,weakOILarray, strongUIarray, weakOILarray); errt.CalculateResults(); double derrt = errt.getStrongERRTL(); //MessageBox.Show("derrt" + derrt); //labelERRT.Text = "ERRT = " + String.Format("{0:0.#######}", derrt.ToString("N6")); // Tom 26/10/2015 see SwingInterface.java line 814 & // the answer @ http://stackoverflow.com/questions/2924242/replicating-javas-decimalformat-in-c-sharp string sws = "Test" + (errt.getWeakSWL() * 100.00).ToString(); sws = (errt.getWeakSWL() * 100).ToString("N2"); labelSWSOnly.Text = sws; string swlText = (errt.getStrongSWL() * 100.00).ToString(); swlText = (errt.getStrongSWL() * 100).ToString("N2"); labelSWSW.Text = swlText; string errtSOnlytext = (errt.getWeakERRTL() * 100).ToString("N2"); string errtText = (errt.getStrongERRTL() * 100.00).ToString("N2"); labelERRTSW.Text = errtText; labelERRTSOnly.Text = errtSOnlytext; System.Diagnostics.Debug.WriteLine("MainForm 821: UnderCalc.numDMTCalls= " + UnderCalc.numDMTCalls); System.Diagnostics.Debug.WriteLine("MainForm 822: OverCalc numStrongOILZero = " + OverCalc.numStrongOILZero + ", numStrongOILNonZero = " + OverCalc.numStrongOILNonZero); // tom 03/11/2015 writing out list of DMT(); results from underCalc to a file string DMTData = UnderCalc.sb.ToString(); //System.IO.StreamWriter file = new System.IO.StreamWriter("c:\\DMT_Test\\DMT_LA.txt"); //file.WriteLine(DMTData); //file.Close(); } else { StemmingErrorGroupBox.Enabled = false; } //MessageBox.Show(j.ToString()); } else { MessageBox.Show("Lengths of Lists are not the same", "ListResults"); } } catch (Exception ex) { MessageBox.Show(ex.Message + "\n \n" + ex.StackTrace, "ListResults"); //throw; } TimeSpan ts = stopWatch.Elapsed; // Format and display the TimeSpan value. string elapsedTime = String.Format("{0:00}:{1:00}.{2:00}", ts.Minutes, ts.Seconds, ts.Milliseconds / 10); toolStripStatusLabelElapsedTime.Text = elapsedTime; stopWatch.Stop(); toolStripProgressBar1.Value = toolStripProgressBar1.Maximum; // finished this.Update(); // force update since ui thread is otherwise blocked }