Пример #1
0
        private static void map(string teModelFileName, string inputFileName, string outputFileName, string srcLang, string trgLang, bool additionalAnnotation)
        {
            Dictionary <string, Dictionary <string, double> > teModel = readTE(teModelFileName);
            Dictionary <string, double> matches = new Dictionary <string, double>();

            StreamReader rdr  = new StreamReader(inputFileName, Encoding.UTF8);
            string       line = "";

            while ((line = rdr.ReadLine()) != null)
            {
                string[] parts = line.Trim().Split('\t');

                Console.Write("Processing pair {0} - {1} ... ", parts[0], parts[1]);
                List <Entity> srcNES = new List <Entity>();
                List <Entity> trgNES = new List <Entity>();
                readNamedEntities(parts[0], parts[1], ref srcNES, ref trgNES, additionalAnnotation);
                match(srcNES, trgNES, teModel, ref matches);
                Console.WriteLine("done!");
            }
            rdr.Close();

            DataStructWriter <string, double> .saveDictionary(matches, outputFileName, false, Encoding.UTF8, '\t', null, null);

            Console.WriteLine("");
            Console.WriteLine("Job finished. Press ENTER to continue...");
            Console.ReadLine();
        }
Пример #2
0
        //private static void createInput(string dir1, string dir2, string oFile)
        //{
        //    string[] files1 = Directory.GetFiles(dir1);
        //    string[] files2 = Directory.GetFiles(dir2);

        //    for (int i = 0; i < files1.Length; i++)
        //    {
        //        files1[i] = Path.GetFileName(files1[i]);
        //    }
        //    for (int i = 0; i < files2.Length; i++)
        //    {
        //        files2[i] = Path.GetFileName(files2[i]);
        //    }

        //    StreamWriter wrt = new StreamWriter(oFile, false, Encoding.UTF8);
        //    wrt.AutoFlush = true;
        //    foreach (string file in files1)
        //    {
        //        if (files2.Contains(file))
        //        {
        //            wrt.WriteLine("{0}\t{1}", dir1 + "/" + file, dir2 + "/" + file);
        //        }
        //    }
        //    wrt.Close();
        //}

        private static void map(string teModelFileName, string inputFileName, string outputFileName, string srcLang, string trgLang, bool additionalAnnotation)
        {
            Dictionary <string, Dictionary <string, double> > teModel = readTE(teModelFileName);
            Dictionary <string, double> matches = new Dictionary <string, double>();

            double       total = 0;
            StreamReader rdr   = new StreamReader(inputFileName, Encoding.UTF8);
            string       line  = "";

            while ((line = rdr.ReadLine()) != null)
            {
                total++;
            }
            rdr.Close();

            double count = 0;

            List <string> text1 = new List <string>();
            List <string> text2 = new List <string>();

            rdr = new StreamReader(inputFileName, Encoding.UTF8);
            while ((line = rdr.ReadLine()) != null)
            {
                count++;
                string[] parts = line.Trim().Split('\t');

                Console.Write("{0} - {1} ... ", parts[0], parts[1]);
                List <string> srcTerms = new List <string>();
                List <string> trgTerms = new List <string>();
                readTerminology(parts[0], parts[1], ref srcTerms, ref trgTerms, additionalAnnotation);
                match(srcTerms, trgTerms, teModel, ref matches);

                //text1.Add(string.Join(" ", new HashSet<string>(clean(srcTerms))));
                //text2.Add(string.Join(" ", new HashSet<string>(clean(trgTerms))));
                Console.WriteLine("done!\t{0:#.##}%", count / total * 100);
            }
            rdr.Close();

            //Dictionary<string, double> matchesEM = EM(text1, text2);

            DataStructWriter <string, double> .saveDictionary(matches, outputFileName, false, Encoding.UTF8, '\t', null, null);

            //DataStructWriter<string, double>.saveDictionary(matchesEM, "em.txt", false, Encoding.UTF8, '\t', null, null);

            Console.WriteLine("");
        }