public void Processing() { NLP nlp = new NLP(); StreamWriter swt = new StreamWriter("./svm-s_test.txt"); StreamWriter uwt = new StreamWriter("./svm-u_test.txt"); StreamReader sr = new StreamReader("./shifted_sent.txt"); StreamReader ur = new StreamReader("./unshifted_sent.txt"); StreamReader sf = new StreamReader("./shifted_file.txt"); StreamReader uf = new StreamReader("./unshifted_file.txt"); StreamReader srt = new StreamReader("./shiftedsent_test.txt"); StreamReader urt = new StreamReader("./unshiftedsent_test.txt"); Dictionary <string, int> dic = new Dictionary <string, int>(); Dictionary <string, string> dicc = new Dictionary <string, string>(); Dictionary <string, int> dict1 = new Dictionary <string, int>(); Dictionary <string, int> dict2 = new Dictionary <string, int>(); Dictionary <string, int> dicts = new Dictionary <string, int>(); int number = 1; int count1 = 0, count2 = 0; string line, file; string pol = "-1"; while (((line = sr.ReadLine()) != null) && ((file = sf.ReadLine()) != null)) { string[] tokenss; if (file.Contains("29590")) { pol = "+1"; } if (!dicc.ContainsKey(line)) { dicc.Add(line, pol); } tokenss = nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(nlp.Tokenize(line)))); foreach (string token in tokenss) { if (pol == "-1") { count1++; if (dict1.ContainsKey(token)) { dict1[token]++; } else { dict1.Add(token, 1); } if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } else { count2++; if (dict2.ContainsKey(token)) { dict2[token]++; } else { dict2.Add(token, 1); } if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } } } while (((line = srt.ReadLine()) != null)) { string[] tokenss; if (!dicc.ContainsKey(line)) { dicc.Add(line, pol); } tokenss = nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(nlp.Tokenize(line)))); foreach (string token in tokenss) { if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } } int tcount; srt = new StreamReader("./shiftedsent_test.txt"); while ((line = srt.ReadLine()) != null) { swt.Write("+1 "); swt.Flush(); tcount = 0; string[] tokens; tokens = nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(nlp.Tokenize(line)))); foreach (string token in tokens) { tcount++; if (!dicts.ContainsKey(token)) { dicts.Add(token, 1); } else { dicts[token]++; } } foreach (KeyValuePair <string, int> kvp in dicts) { if (!dict1.ContainsKey(kvp.Key) || !dict2.ContainsKey(kvp.Key) || (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key] + dict2[kvp.Key]) / (double)(count1 + count2)))) < 0)) { continue; } else { swt.Write(dic[kvp.Key] + ":" + (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key] + dict2[kvp.Key]) / (double)(count1 + count2))))) + " "); } swt.Flush(); } swt.WriteLine(); swt.Flush(); dicts.Clear(); } srt.Close(); swt.Close(); sr.Close(); sf.Close(); dic.Clear(); dict1.Clear(); dict2.Clear(); pol = "-1"; number = 1; count1 = 0; count2 = 0; while (((line = ur.ReadLine()) != null) && ((file = uf.ReadLine()) != null)) { string[] tokenfs; if (file.Contains("29590")) { pol = "+1"; } if (!dicc.ContainsKey(line)) { dicc.Add(line, pol); } tokenfs = nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(nlp.Tokenize(line)))); foreach (string token in tokenfs) { if (pol == "-1") { count1++; if (dict1.ContainsKey(token)) { dict1[token]++; } else { dict1.Add(token, 1); } if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } else { count2++; if (dict2.ContainsKey(token)) { dict2[token]++; } else { dict2.Add(token, 1); } if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } } } while (((line = urt.ReadLine()) != null)) { string[] tokenss; if (!dicc.ContainsKey(line)) { dicc.Add(line, pol); } tokenss = nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(nlp.Tokenize(line)))); foreach (string token in tokenss) { if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } } urt = new StreamReader("./unshiftedsent_test.txt"); while ((line = urt.ReadLine()) != null) { tcount = 0; uwt.Write("+1 "); uwt.Flush(); string[] tokens; tokens = nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(nlp.Tokenize(line)))); foreach (string token in tokens) { tcount++; if (!dicts.ContainsKey(token)) { dicts.Add(token, 1); } else { dicts[token]++; } } foreach (KeyValuePair <string, int> kvp in dicts) { if (!dict2.ContainsKey(kvp.Key) || !dict1.ContainsKey(kvp.Key) || (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key] + dict2[kvp.Key]) / (double)(count1 + count2)))) < 0)) { continue; } else { uwt.Write(dic[kvp.Key] + ":" + (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key] + dict2[kvp.Key]) / (double)(count1 + count2))))) + " "); } uwt.Flush(); } uwt.WriteLine(); uwt.Flush(); dicts.Clear(); } urt.Close(); uwt.Close(); ur.Close(); uf.Close(); }
public void Processing(string folder1, string folder2, string folder3) { StreamWriter sww = new StreamWriter("./svm_B.txt"); StreamWriter swt = new StreamWriter("./svm_B_test.txt"); string folderName = System.Windows.Forms.Application.StartupPath + @folder1; int number = 1; NLP nlp = new NLP(); int count1 = 0, count2 = 0; Dictionary <string, int> dict = new Dictionary <string, int>(); Dictionary <string, int> dic = new Dictionary <string, int>(); Dictionary <string, int> dict1 = new Dictionary <string, int>(); Dictionary <string, int> dict2 = new Dictionary <string, int>(); Dictionary <string, int> dicts = new Dictionary <string, int>(); for (int i = 0; i < 3; i++) { foreach (string fname in System.IO.Directory.GetFiles(folderName)) { string line; System.IO.StreamReader file = new System.IO.StreamReader(fname); while ((line = file.ReadLine()) != null) { string[] sents = nlp.SentDetect(line.Trim()); string[] tokens, tokens_reviews; Tuple <string, string>[] bigram; foreach (string sent in sents) { tokens = nlp.Tokenize(sent);//tokenize sentences bigram = nlp.Bigrams(nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(tokens)))); tokens_reviews = new string[bigram.Length]; int k = 0; foreach (Tuple <string, string> tpl in bigram) { tokens_reviews[k++] = tpl.Item1 + tpl.Item2; } foreach (string token in tokens_reviews) { if (i == 0) { count1++; if (dict1.ContainsKey(token)) { dict1[token]++; } else { dict1.Add(token, 1); } } else if (i == 1) { count2++; if (dict2.ContainsKey(token)) { dict2[token]++; } else { dict2.Add(token, 1); } } if (dict.ContainsKey(token)) { dict[token]++; } else { dict.Add(token, 1); } if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } } } /*if(System.IO.Directory.GetFiles(folderName).Last() == fname) * file.Close();*/ } if (i == 0) { folderName = System.Windows.Forms.Application.StartupPath + @folder2; } else if (i == 1) { folderName = System.Windows.Forms.Application.StartupPath + @folder3; } foreach (KeyValuePair <string, int> item in dict) { Console.WriteLine(item.Key + " " + item.Value); } } folderName = System.Windows.Forms.Application.StartupPath + @folder1; for (int i = 0; i < 3; i++) { foreach (string fname in System.IO.Directory.GetFiles(folderName)) { int tcount = 0; string line; System.IO.StreamReader file = new System.IO.StreamReader(fname); if (i == 0) { sww.Write("-1 "); } else if (i == 1) { sww.Write("+1 "); } else { swt.Write("+1 "); } while ((line = file.ReadLine()) != null) { if (i < 2) { sww.Flush(); } else { swt.Flush(); } string[] sents = nlp.SentDetect(line.Trim()); string[] tokens, tokens_reviews; Tuple <string, string>[] bigram; foreach (string sent in sents) { tokens = nlp.Tokenize(sent);//tokenize sentences bigram = nlp.Bigrams(nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(tokens)))); tokens_reviews = new string[bigram.Length]; int k = 0; foreach (Tuple <string, string> tpl in bigram) { tokens_reviews[k++] = tpl.Item1 + tpl.Item2; } foreach (string token in tokens_reviews) { tcount++; if (!dicts.ContainsKey(token)) { dicts.Add(token, 1); } else { dicts[token]++; } } } } foreach (KeyValuePair <string, int> kvp in dicts) { if (i == 0) { if (!dict2.ContainsKey(kvp.Key) || (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict2[kvp.Key]) / (double)(count2)))) < 0)) { continue; } else { sww.Write(dic[kvp.Key] + ":" + (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict2[kvp.Key]) / (double)(count2)))))); } } else if (i == 1) { if (!dict1.ContainsKey(kvp.Key) || (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key]) / (double)(count1)))) < 0)) { continue; } else { sww.Write(dic[kvp.Key] + ":" + (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key]) / (double)(count1)))))); } } else { if (!dict1.ContainsKey(kvp.Key) || !dict2.ContainsKey(kvp.Key) || (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key] + dict2[kvp.Key]) / (double)(count1 + count2)))) < 0)) { continue; } else { swt.Write(dic[kvp.Key] + ":" + (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key] + dict2[kvp.Key]) / (double)(count1 + count2)))))); } } if (i < 2) { sww.Flush(); sww.Write(" "); sww.Flush(); } else { swt.Flush(); swt.Write(" "); swt.Flush(); } } if (i < 2) { sww.WriteLine(""); sww.Flush(); } else { swt.WriteLine(""); swt.Flush(); } if (System.IO.Directory.GetFiles(folderName).Last() == fname && i == 2) { file.Close(); } dicts.Clear(); } if (i == 0) { folderName = System.Windows.Forms.Application.StartupPath + @folder2; } else if (i == 1) { folderName = System.Windows.Forms.Application.StartupPath + @folder3; sww.Close(); } if (i == 2) { swt.Close(); } } }
public void Processing(string folder1, string folder2, string folder3) { StreamWriter swt = new StreamWriter("./svm-negation_test.txt"); string folderName = System.Windows.Forms.Application.StartupPath + @folder1; int number = 1; NLP nlp = new NLP(); int count1 = 0, count2 = 0; Dictionary <string, int> dict = new Dictionary <string, int>(); Dictionary <string, int> dic = new Dictionary <string, int>(); Dictionary <string, int> dict1 = new Dictionary <string, int>(); Dictionary <string, int> dict2 = new Dictionary <string, int>(); Dictionary <string, int> dicts = new Dictionary <string, int>(); for (int i = 0; i < 3; i++) { foreach (string fname in System.IO.Directory.GetFiles(folderName)) { string line, neggram = ""; System.IO.StreamReader file = new System.IO.StreamReader(fname); while ((line = file.ReadLine()) != null) { bool neg = false; string[] sents = nlp.SentDetect(line.Trim()); string[] tokens, tokens_reviews; foreach (string sent in sents) { string pretok = ""; tokens = nlp.Tokenize(sent);//tokenize sentences tokens_reviews = nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(tokens))); foreach (string token in tokens_reviews) { if (nlp.IsNegation(token)) { neggram = pretok + token; neg = true; if (i == 0) { count1++; if (dict1.ContainsKey(neggram)) { dict1[neggram]++; } else { dict1.Add(neggram, 1); } } else if (i == 1) { count2++; if (dict2.ContainsKey(neggram)) { dict2[neggram]++; } else { dict2.Add(neggram, 1); } } if (dict.ContainsKey(neggram)) { dict[neggram]++; } else { dict.Add(neggram, 1); } if (dic.ContainsKey(neggram)) { continue; } else { dic.Add(neggram, number++); } neggram = token; continue; } pretok = token; if (i == 0) { if (neg) { neggram = neggram + token; count1++; if (dict1.ContainsKey(neggram)) { dict1[neggram]++; } else { dict1.Add(neggram, 1); } if (dict.ContainsKey(neggram)) { dict[neggram]++; } else { dict.Add(neggram, 1); } if (dic.ContainsKey(neggram)) { continue; } else { dic.Add(neggram, number++); } neg = false; } else { count1++; if (dict1.ContainsKey(token)) { dict1[token]++; } else { dict1.Add(token, 1); } if (dict.ContainsKey(token)) { dict[token]++; } else { dict.Add(token, 1); } if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } } else if (i == 1) { if (neg) { neggram = neggram + token; count2++; if (dict2.ContainsKey(neggram)) { dict2[neggram]++; } else { dict2.Add(neggram, 1); } if (dict.ContainsKey(neggram)) { dict[neggram]++; } else { dict.Add(neggram, 1); } if (dic.ContainsKey(neggram)) { continue; } else { dic.Add(neggram, number++); } neg = false; } else { count2++; if (dict2.ContainsKey(token)) { dict2[token]++; } else { dict2.Add(token, 1); } if (dict.ContainsKey(token)) { dict[token]++; } else { dict.Add(token, 1); } if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } } else { if (neg) { neggram = neggram + token; if (dict.ContainsKey(neggram)) { dict[neggram]++; } else { dict.Add(neggram, 1); } if (dic.ContainsKey(neggram)) { continue; } else { dic.Add(neggram, number++); } neg = false; } else { if (dict.ContainsKey(token)) { dict[token]++; } else { dict.Add(token, 1); } if (dic.ContainsKey(token)) { continue; } else { dic.Add(token, number++); } } } } } } /*if(System.IO.Directory.GetFiles(folderName).Last() == fname) * file.Close();*/ } if (i == 0) { folderName = System.Windows.Forms.Application.StartupPath + @folder2; } else if (i == 1) { folderName = System.Windows.Forms.Application.StartupPath + @folder3; } } foreach (string fname in System.IO.Directory.GetFiles(folderName)) { int tcount = 0; string line; System.IO.StreamReader file = new System.IO.StreamReader(fname); swt.Write("+1 "); while ((line = file.ReadLine()) != null) { swt.Flush(); string[] sents = nlp.SentDetect(line.Trim()); string[] tokens, tokens_reviews; foreach (string sent in sents) { tokens = nlp.Tokenize(sent);//tokenize sentences tokens_reviews = nlp.Lemmatization(nlp.Stem(nlp.FilterOutStopWords(tokens))); foreach (string token in tokens_reviews) { tcount++; if (!dicts.ContainsKey(token)) { dicts.Add(token, 1); } else { dicts[token]++; } } } } foreach (KeyValuePair <string, int> kvp in dicts) { if (!dict1.ContainsKey(kvp.Key) || !dict2.ContainsKey(kvp.Key) || (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key] + dict2[kvp.Key]) / (double)(count1 + count2)))) < 0)) { continue; } else { swt.Write(dic[kvp.Key] + ":" + (Math.Log((double)(((double)(kvp.Value) / (double)(tcount)) / ((double)(dict1[kvp.Key] + dict2[kvp.Key]) / (double)(count1 + count2)))))); } swt.Flush(); swt.Write(" "); swt.Flush(); } swt.WriteLine(""); swt.Flush(); if (System.IO.Directory.GetFiles(folderName).Last() == fname) { file.Close(); } dicts.Clear(); } swt.Close(); }