private static void map(string teModelFileName, string inputFileName, string outputFileName, string srcLang, string trgLang, bool additionalAnnotation) { Dictionary <string, Dictionary <string, double> > teModel = readTE(teModelFileName); Dictionary <string, double> matches = new Dictionary <string, double>(); StreamReader rdr = new StreamReader(inputFileName, Encoding.UTF8); string line = ""; while ((line = rdr.ReadLine()) != null) { string[] parts = line.Trim().Split('\t'); Console.Write("Processing pair {0} - {1} ... ", parts[0], parts[1]); List <Entity> srcNES = new List <Entity>(); List <Entity> trgNES = new List <Entity>(); readNamedEntities(parts[0], parts[1], ref srcNES, ref trgNES, additionalAnnotation); match(srcNES, trgNES, teModel, ref matches); Console.WriteLine("done!"); } rdr.Close(); DataStructWriter <string, double> .saveDictionary(matches, outputFileName, false, Encoding.UTF8, '\t', null, null); Console.WriteLine(""); Console.WriteLine("Job finished. Press ENTER to continue..."); Console.ReadLine(); }
//private static void createInput(string dir1, string dir2, string oFile) //{ // string[] files1 = Directory.GetFiles(dir1); // string[] files2 = Directory.GetFiles(dir2); // for (int i = 0; i < files1.Length; i++) // { // files1[i] = Path.GetFileName(files1[i]); // } // for (int i = 0; i < files2.Length; i++) // { // files2[i] = Path.GetFileName(files2[i]); // } // StreamWriter wrt = new StreamWriter(oFile, false, Encoding.UTF8); // wrt.AutoFlush = true; // foreach (string file in files1) // { // if (files2.Contains(file)) // { // wrt.WriteLine("{0}\t{1}", dir1 + "/" + file, dir2 + "/" + file); // } // } // wrt.Close(); //} private static void map(string teModelFileName, string inputFileName, string outputFileName, string srcLang, string trgLang, bool additionalAnnotation) { Dictionary <string, Dictionary <string, double> > teModel = readTE(teModelFileName); Dictionary <string, double> matches = new Dictionary <string, double>(); double total = 0; StreamReader rdr = new StreamReader(inputFileName, Encoding.UTF8); string line = ""; while ((line = rdr.ReadLine()) != null) { total++; } rdr.Close(); double count = 0; List <string> text1 = new List <string>(); List <string> text2 = new List <string>(); rdr = new StreamReader(inputFileName, Encoding.UTF8); while ((line = rdr.ReadLine()) != null) { count++; string[] parts = line.Trim().Split('\t'); Console.Write("{0} - {1} ... ", parts[0], parts[1]); List <string> srcTerms = new List <string>(); List <string> trgTerms = new List <string>(); readTerminology(parts[0], parts[1], ref srcTerms, ref trgTerms, additionalAnnotation); match(srcTerms, trgTerms, teModel, ref matches); //text1.Add(string.Join(" ", new HashSet<string>(clean(srcTerms)))); //text2.Add(string.Join(" ", new HashSet<string>(clean(trgTerms)))); Console.WriteLine("done!\t{0:#.##}%", count / total * 100); } rdr.Close(); //Dictionary<string, double> matchesEM = EM(text1, text2); DataStructWriter <string, double> .saveDictionary(matches, outputFileName, false, Encoding.UTF8, '\t', null, null); //DataStructWriter<string, double>.saveDictionary(matchesEM, "em.txt", false, Encoding.UTF8, '\t', null, null); Console.WriteLine(""); }