Exemple #1
0
        public static double rho(RegressionSet simlexInstance, List <SimpleEntry> calculatedResults, int idx)
        {
            double avg1  = 0;
            double avg2  = 0;
            double avg12 = 0;

            foreach (Entry simLexEntry in simlexInstance.Entries)
            {
                //W kolejnej linii pobierana jest wartość wygenerowanego podobieństwa pomiędzy słowami z aktualnie przetwarzanego Entry z SimLex'a
                SimpleEntry calculatedEntry = calculatedResults.Where(x => x.w1 == simLexEntry.word1 && x.w2 == simLexEntry.word2).First();
                avg1  += simLexEntry.simLex;
                avg2  += calculatedEntry.result;
                avg12 += simLexEntry.simLex * calculatedEntry.result;
            }
            avg1  /= simlexInstance.Entries.Count;
            avg2  /= simlexInstance.Entries.Count;
            avg12 /= simlexInstance.Entries.Count;
            double cov  = avg12 - (avg1 * avg2);
            double dev1 = 0;
            double dev2 = 0;

            foreach (Entry simLexEntry in simlexInstance.Entries)
            {
                SimpleEntry calculatedEntry = calculatedResults.Where(x => x.w1 == simLexEntry.word1 && x.w2 == simLexEntry.word2).First();
                dev1 += (simLexEntry.simLex - avg1) * (simLexEntry.simLex - avg1);
                dev2 += (calculatedEntry.result - avg2) * (calculatedEntry.result - avg2);
            }
            dev1 /= (simlexInstance.Entries.Count - 1);
            dev2 /= (simlexInstance.Entries.Count - 1);

            dev1 = Math.Sqrt(dev1);
            dev2 = Math.Sqrt(dev2);

            return(cov / (dev1 * dev2));
        }
Exemple #2
0
        public List <SimpleEntry> generateEntriesForComparison(RegressionSet input)
        {
            List <SimpleEntry> entries = new List <SimpleEntry>();
            int i = 0;

            foreach (Entry entry in input.Entries)
            {
                System.Console.WriteLine(i++);
                //Console.WriteLine(i++);
                var    t      = compare(entry.word1, entry.word2);
                double result = 0.5;
                try
                {
                    t.Wait();
                    result = t.GetAwaiter().GetResult();
                }
                catch
                {
                }
                entries.Add(new SimpleEntry(entry.word1, entry.word2, result));
            }
            return(entries);
        }
        static void Main(string[] args)
        {
            Model m = new Model("D://glove_vec//paragram_filtered//paragram_filtered_vecs.txt");

            m = m.cos_retrofit("D://glove_vec//ppdb_synonyms.txt", "D://glove_vec//wordnet_antonyms.txt", 20);
            //m.saveModel("D://glove_vec//glove_mrksic//glove_retro_vecs.txt");
            Model.loadCentroids("D://glove_vec//glove_mrksic//centroids_retro10");
            RegressionSet sl = new RegressionSet("D://SimLex-999//SimLex-999.txt");

            Model.sl = sl;
            ClassificationSet esl   = new ClassificationSet("D://glove_vec//esl", ClassificationSet.SetType.ESL);
            ClassificationSet toefl = new ClassificationSet("D://glove_vec//toefl", ClassificationSet.SetType.TOEFL);

            //var dict = HelperFunctions.generateDictionary(new List<TestSet> { sl999, esl, toefl });


            Model.compStyle = Vector.comparisonStyle.Cosine;
            Classifier cEsl   = new Classifier(esl, m);
            Classifier cToefl = new Classifier(toefl, m);
            var        r0     = m.generateEntriesForComparison(sl);
            double     acc    = cEsl.accuracy();
            double     acc2   = cToefl.accuracy();
            double     rho    = HelperFunctions.rho(sl, r0, 0);

            //double rho = 0;
            //double acc = 0;
            //double acc2 = 0;
            System.Console.WriteLine("ESL: {0}\nTOEFL: {1}\nSimLex999: {2}", acc, acc2, rho);
            Console.ReadKey();

            /*
             *
             * Model.loadCentroids("D://glove_vec/centroids5");
             * Model.sl = sl;
             *
             * Model.compStyle = Vector.comparisonStyle.Cosine;
             * var r0 = m.generateEntriesForComparison(sl);
             * var result0 = HelperFunctions.rho(sl, r0, 0);
             * Console.WriteLine(result0);
             * StreamWriter sw = new StreamWriter("D://results.txt");
             * Model.compStyle = Vector.comparisonStyle.CosineHR;
             */
            /*for (int i = 0; i < 100 ; i++)
             * {
             *  //Model.Beta1 = i/10.0;
             *  if (i % 50 == 0) Console.WriteLine(i);
             *  var r = m.generateEntriesForComparison(sl);
             *  var result = HelperFunctions.rho(sl, r, 0);
             *  Model.msqe /= 999.0;
             *  sw.WriteLine("{0}\t{1:F6}\tBeta1:{2}\tBeta2:{3}\tBeta3:{4}\tMSQE: {5}",i/100.0,result,Model.Beta1,Model.Beta2,Model.Beta3, Model.msqe);
             *  Model.msqe = 0;
             *  sl.shuffle();
             *  Model.resetBatch();
             * }
             * sw.Close();*/

            /*
             * int remainder = 1029 + 420;
             * Stopwatch s = new Stopwatch();
             * //Model centroids = new Model(m.Vectors.Length, m.Vectors[0].V_size);
             * //remainder = m.Vectors.Length;
             * Model centroids = new Model(1029 + 420, m.Vectors[0].V_size);
             * s.Start();
             * foreach (Vector v in m.Vectors)
             * {
             *  if (sl.Entries.Select(x => x.word1).Contains(v.Label) ||
             *      sl.Entries.Select(x => x.word2).Contains(v.Label) ||
             *      esl.Questions.Select(x => x.Answer).Contains(v.Label) ||
             *      esl.Questions.Select(x => x.Possibilities[0]).Contains(v.Label) ||
             *      esl.Questions.Select(x => x.Possibilities[1]).Contains(v.Label) ||
             *      esl.Questions.Select(x => x.Possibilities[2]).Contains(v.Label) ||
             *      esl.Questions.Select(x => x.Possibilities[3]).Contains(v.Label) ||
             *      toefl.Questions.Select(x => x.Answer).Contains(v.Label) ||
             *      toefl.Questions.Select(x => x.Possibilities[0]).Contains(v.Label) ||
             *      toefl.Questions.Select(x => x.Possibilities[1]).Contains(v.Label) ||
             *      toefl.Questions.Select(x => x.Possibilities[2]).Contains(v.Label) ||
             *      toefl.Questions.Select(x => x.Possibilities[3]).Contains(v.Label)
             *
             *      )
             *  {
             *
             *      var t = m.findNMostSimilar(v, 10);
             *      var c = m.calculateCentroid(v.Label, t);
             *      Console.WriteLine("{0}/{1} : {2:F5}%\t {3:D2}:{4:D2}:{5:D2}", remainder, 1029+420, 100 * (1 - (remainder / (1029.0+420.0))), s.Elapsed.Hours, s.Elapsed.Minutes, s.Elapsed.Seconds);
             *      remainder--;
             *
             *      centroids.Vectors[remainder] = c;
             *  }
             * }
             *
             * centroids.saveModel("D://glove_vec//centroids_original5");
             */


            //var entries = m.generateEntriesForComparison(sl);

            //double res = HelperFunctions.rho(sl, entries, 0);
            //System.Console.WriteLine("{0}", res);

            //Model m = new Model("../../../data/nmrksic/counter-fitting.git/trunk/word_vectors/");
        }