private static double CalculateSimilarity(List<int> wordId1, List<int> wordId2) { if (lw1 != null && lw2 != null) { int universeSize = (lw1.Count + lw2.Count) / 2; int numHashFunctions = 100; using (minHash = new MinHash(universeSize, numHashFunctions)) { List<uint> mh1 = minHash.GetMinHash(wordId1); List<uint> mh2 = minHash.GetMinHash(wordId2); return minHash.Similarity(mh1, mh2); } } else throw new NullReferenceException("Fill the word lists first before continuing to proceed similarity calculation."); }
protected void saveButton_Click(object sender, EventArgs e) { //hashing no. + hasil akhir List<double> jaccard = new List<double>(); List<double> pearson = new List<double>(); //tokenize var lw1 = TFIDF.Tokenize(teks1.Text); var lw2 = TFIDF.Tokenize(teks2.Text); int hashno = Convert.ToInt32(Math.Ceiling((decimal)(lw1.Count+ lw2.Count)/2)); MinHash mh = new MinHash(Math.Min(lw1.Count, lw2.Count), hashno); //vocab list var vl1 = new List<string>(); var vl2 = new List<string>(); //pasangan antara ID kamus dengan string yang dicocokkan Dictionary<int, string> res1 = new Dictionary<int, string>(); Dictionary<int, string> res2 = new Dictionary<int, string>(); //list ID kamus aja List<int> sr1 = wn.GetSemanticRelation(lw1, 1, 2, out vl1, out res1, dictfile, stemmer); List<int> sr2 = wn.GetSemanticRelation(lw2, 2, 2, out vl2, out res2, dictfile, stemmer); List<uint> hash1 = mh.GetMinHash(sr1); List<uint> hash2 = mh.GetMinHash(sr2); List<double> hashdb1 = new List<double>(); List<double> hashdb2 = new List<double>(); //convert uint to double string hr1 = string.Empty; string hr2 = string.Empty; //print code + id foreach (var item in res1) { hr1 += "<p>" + item.Value + ": " + item.Key.ToString() + "</p>"; } foreach (var item in res2) { hr2 += "<p>" + item.Value + ": " + item.Key.ToString() + "</p>"; } foreach (var item in hash1) { var cvt = Convert.ToDouble(item); hashdb1.Add(cvt); } foreach (var item in hash2) { var cvt = Convert.ToDouble(item); hashdb2.Add(Convert.ToDouble(cvt)); } //final jaccard & pearson var jacc = ((double)mh.Similarity(hash1, hash2)); var jaccpercent = (double)jacc * 100; var jaccmin = (double)(jacc - 1); var pear = pr.Calc(hashdb1, hashdb2); jaccardResult.Text = jaccpercent + "%"; pearsonResult.Text = (double)(100-pear) + "%"; hashresult1.Text = hr1; hashresult2.Text = hr2; divresult.Visible = true; }
private static void ExportExcel(int currentindex) { Pearson pr = new Pearson(); int hashno = 100; using (ExcelPackage ep = new ExcelPackage()) { ep.Workbook.Properties.Title = "FINGERPRINT ANALYSIS"; ep.Workbook.Properties.Author = "THEODORUS YOGA M"; var ws = ep.Workbook.Worksheets.Add("Fingerprint Result"); ws.Cells[1, 1].Value = "Hash No."; ws.Cells[1, 2].Value = "Jaccard"; ws.Cells[1, 3].Value = "Pearson"; ws.Cells[1, 4].Value = "Euclidean"; int rowstart = 2; List<double> jaccard = new List<double>(); List<double> jaccardMin = new List<double>(); List<double> pearson = new List<double>(); List<double> euclidean = new List<double>(); while (hashno <= 5000) { //Console.WriteLine("Processing " + hashno + " hash functions"); MinHash mh = new MinHash(Math.Min(lw1.Count, lw2.Count), hashno); //List<int> example1 = new List<int>() { 21, 50, 67, 101, 108 }; //List<int> example2 = new List<int>() { 23, 52, 90, 102, 110 }; hash1 = mh.GetMinHash(sr1); //get minimal hashing for 1st semantic relation hash2 = mh.GetMinHash(sr2); //get minimal hashing for 2nd semantic relation //hash1 = mh.GetMinHash(example1); //hash2 = mh.GetMinHash(example2); //convert to double for pearson List<double> hashdb1 = new List<double>(); List<double> hashdb2 = new List<double>(); int count1 = hash1.Count; int current = 1; foreach (uint hash in hash1) { hashdb1.Add(Convert.ToDouble(hash)); //Console.SetCursorPosition(0, Wordnet.consoleln); //Console.WriteLine("\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"); //Console.SetCursorPosition(0, Wordnet.consoleln); //Console.WriteLine("STATUS: Processing hash functions in document 1 of 2: " + current + "/" + count1); current++; } Wordnet.consoleln++; int count2 = hash2.Count; current = 1; foreach (uint hash in hash2) { hashdb2.Add(Convert.ToDouble(hash)); //Console.SetCursorPosition(0, Wordnet.consoleln); //Console.WriteLine("\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"); //Console.SetCursorPosition(0, Wordnet.consoleln); //Console.WriteLine("STATUS: Processing hash functions in document 2 of 2: " + current + "/" + count2); current++; } Wordnet.consoleln++; //calculate TF-IDF List<List<string>> vocabsInDoc = new List<List<string>>(); vocabsInDoc.Add(vl1); vocabsInDoc.Add(vl2); var tfidf = TFIDF.CalculateTFIDF(vocabsInDoc); Wordnet.consoleln++; //Console.WriteLine("JACCARD SIMILARITY DEGREE: " + mh.Similarity(hash1, hash2)); //Console.WriteLine("PEARSON SIMILARITY DEGREE: " + pr.Calc(hashdb1, hashdb2)); //Console.WriteLine("TF-IDF EUCLIDEAN DISTANCE:" + TFIDF.EuclideanDist(tfidf[0], tfidf[1])); Console.WriteLine(hashno + "\t,\t" + ((double)mh.Similarity(hash1, hash2)) + "\t,\t" + pr.Calc(hashdb1, hashdb2) + "\t,\t" + TFIDF.EuclideanDist(tfidf[0], tfidf[1])); ws.Cells[rowstart, 1].Value = hashno.ToString(); var jacc = ((double)mh.Similarity(hash1, hash2)); var jaccmin = (double)(jacc - 1); var pear = pr.Calc(hashdb1, hashdb2); var euc = TFIDF.EuclideanDist(tfidf[0], tfidf[1]); ws.Cells[rowstart, 2].Value = jacc; jaccard.Add(jacc); pearson.Add(pear); euclidean.Add(euc); jaccardMin.Add(Math.Abs(jaccmin)); ws.Cells[rowstart, 3].Value = pear; ws.Cells[rowstart, 4].Value = TFIDF.EuclideanDist(tfidf[0], tfidf[1]); //foreach (var vector in tfidf) //{ // foreach (var val in vector) // { // Console.Write(val + ";"); // } // Console.WriteLine(); //} hashno += 50; rowstart++; } FileStream file; if (!Directory.Exists("Out")) Directory.CreateDirectory("Out"); if (!File.Exists("Out/naziefresult_" + currentindex.ToString() + ".xlsx")) { FileStream fs = File.Create("Out/naziefresult_" + currentindex.ToString() + ".xlsx"); fs.Dispose(); } file = File.Open("Out/naziefresult_" + currentindex.ToString() + ".xlsx", FileMode.Open); string append = string.Empty; append += "JaccardMin<-c("; foreach (var item in jaccardMin) { append += item.ToString().Replace(',', '.'); append += ", "; } append += ")"; append += "\n"; append += "Jaccard<-c("; foreach (var item in jaccard) { append += item.ToString().Replace(',', '.'); append += ", "; } append += ")"; append += "\n"; append += "Pearson<-c("; foreach (var item in pearson) { append += item.ToString().Replace(',', '.'); append += ", "; } append += ")"; append += "\n"; append += "Euclidean<-c("; foreach (var item in euclidean) { append += item.ToString().Replace(',', '.'); append += ", "; } append += ")"; //Console.WriteLine("c("); //foreach (var item in res) //{ // Console.Write(item.ToString().Replace(',', '.')); // Console.Write(", "); //} //Console.Write(")"); ep.SaveAs(file); WriteFile(path + @"\Out\naziefr_" + currentindex.ToString() + ".txt", append); file.Close(); } }