示例#1
0
        // (0.5,0.5):
        // weight distribution = Gaussian(-0.02787, 0.2454)
        // error rate = 0.0527452589744697 = 1221/23149
        // (1,1):
        // weight distribution = Gaussian(-0.03117, 0.3967)
        // error rate = 0.0522268780508877 = 1209/23149
        // (2,2):
        // weight distribution = Gaussian(-0.03522, 0.6794)
        // error rate = 0.0530476478465593 = 1228/23149
        // (10,10):
        // weight distribution = Gaussian(-0.05455, 2.96)
        // error rate = 0.0580586634411854 = 1344/23149

#if SUPPRESS_UNREACHABLE_CODE_WARNINGS
#pragma warning restore 162
#endif

        public static void Rcv1Test2()
        {
            GaussianArray   wPost;
            Gaussian        biasPost;
            BinaryFormatter serializer = new BinaryFormatter();

            //TODO: change path
            using (Stream stream = File.OpenRead(@"c:\Users\minka\Downloads\rcv1\weights.bin"))
            {
                wPost    = (GaussianArray)serializer.Deserialize(stream);
                biasPost = (Gaussian)serializer.Deserialize(stream);
            }
            if (true)
            {
                GaussianEstimator est = new GaussianEstimator();
                foreach (Gaussian item in wPost)
                {
                    est.Add(item.GetMean());
                }
                Console.WriteLine("weight distribution = {0}", est.GetDistribution(new Gaussian()));
            }
            var predict = new BpmPredict2();

            predict.SetPriors(wPost, biasPost);
            int count  = 0;
            int errors = 0;

            //TODO: change path
            foreach (Instance instance in new VwReader(@"c:\Users\minka\Downloads\rcv1\rcv1.test.vw.gz"))
            {
                bool yPred = predict.Predict(instance);
                if (yPred != instance.label)
                {
                    errors++;
                }
                count++;
            }
            Console.WriteLine("error rate = {0} = {1}/{2}", (double)errors / count, errors, count);
        }
示例#2
0
        public static void Rcv1Test(double wVariance, double biasVariance)
        {
            int count = 0;

            if (false)
            {
                int maxFeatureIndex = 0;
                foreach (Instance instance in new VwReader(Path.Combine(dataFolder, "rcv1.train.vw.gz")))
                {
                    count++;
                    if (count % 10000 == 0)
                    {
                        Console.WriteLine(count);
                    }
                    foreach (int index in instance.featureIndices)
                    {
                        if (index > maxFeatureIndex)
                        {
                            maxFeatureIndex = index;
                        }
                    }
                }
                Console.WriteLine("{0} features", maxFeatureIndex + 1);
            }
            int nf      = 47152;
            var train   = new BpmTrain2();
            var predict = new BpmPredict2();

            train.SetPriors(nf, wVariance, biasVariance);

            StreamWriter writer = new StreamWriter(Path.Combine(dataFolder, "log.txt"));
            int          errors = 0;

            //int errors2 = 0;
            //StreamReader reader = new StreamReader(Path.Combine(dataFolder, "preds.txt");
            // takes 92s to train
            // takes 74s just to read the data
            // takes 15s just to do 'wc' on the data
            // there are 781265 data points in train, 23149 in test
            foreach (Instance instance in new VwReader(Path.Combine(dataFolder, "rcv1.train.vw.gz")))
            {
                predict.SetPriors(train.wPost, train.biasPost);
                bool yPred = predict.Predict(instance);
                if (yPred != instance.label)
                {
                    errors++;
                }
                //double pred2 = double.Parse(reader.ReadLine());
                //if ((pred2 > 0.5) != instance.label) errors2++;
                train.Train(instance);
                count++;
                if (count % 1000 == 0)
                {
                    Console.WriteLine("{0} {1} {2}", count, (double)errors / count, train.biasPost);
                    //Console.WriteLine("{0} {1} {2} {3}", count, (double)errors/count, (double)errors2/count, train.biasPost);
                    writer.WriteLine("{0} {1}", count, (double)errors / count);
                    writer.Flush();
                    //if (count == 10000) break;
                }
            }
            writer.Dispose();
#if NETFRAMEWORK
            // In the .NET 5.0 BinaryFormatter is obsolete
            // and would produce errors. This test code should be migrated.
            // See https://aka.ms/binaryformatter

            if (true)
            {
                BinaryFormatter serializer = new BinaryFormatter();
                using (Stream stream = File.Create(Path.Combine(dataFolder, "weights.bin")))
                {
                    serializer.Serialize(stream, train.wPost);
                    serializer.Serialize(stream, train.biasPost);
                }
            }
#endif
        }
示例#3
0
#pragma warning disable 162
#endif

        public static void Rcv1Test(double wVariance, double biasVariance)
        {
            int count = 0;

            if (false)
            {
                int maxFeatureIndex = 0;
                //TODO: change path
                foreach (Instance instance in new VwReader(@"c:\Users\minka\Downloads\rcv1\rcv1.train.vw.gz"))
                {
                    count++;
                    if (count % 10000 == 0)
                    {
                        Console.WriteLine(count);
                    }
                    foreach (int index in instance.featureIndices)
                    {
                        if (index > maxFeatureIndex)
                        {
                            maxFeatureIndex = index;
                        }
                    }
                }
                Console.WriteLine("{0} features", maxFeatureIndex + 1);
            }
            int nf      = 47152;
            var train   = new BpmTrain2();
            var predict = new BpmPredict2();

            train.SetPriors(nf, wVariance, biasVariance);

            //TODO: change path
            StreamWriter writer = new StreamWriter(@"c:\Users\minka\Downloads\rcv1\log.txt");
            int          errors = 0;

            //int errors2 = 0;
            //TODO: change path
            //StreamReader reader = new StreamReader(@"c:\Users\minka\Downloads\rcv1\preds.txt");
            // takes 92s to train
            // takes 74s just to read the data
            // takes 15s just to do 'wc' on the data
            // there are 781265 data points in train, 23149 in test
            //TODO: change path
            foreach (Instance instance in new VwReader(@"c:\Users\minka\Downloads\rcv1\rcv1.train.vw.gz"))
            {
                predict.SetPriors(train.wPost, train.biasPost);
                bool yPred = predict.Predict(instance);
                if (yPred != instance.label)
                {
                    errors++;
                }
                //double pred2 = double.Parse(reader.ReadLine());
                //if ((pred2 > 0.5) != instance.label) errors2++;
                train.Train(instance);
                count++;
                if (count % 1000 == 0)
                {
                    Console.WriteLine("{0} {1} {2}", count, (double)errors / count, train.biasPost);
                    //Console.WriteLine("{0} {1} {2} {3}", count, (double)errors/count, (double)errors2/count, train.biasPost);
                    writer.WriteLine("{0} {1}", count, (double)errors / count);
                    writer.Flush();
                    //if (count == 10000) break;
                }
            }
            writer.Dispose();
            if (true)
            {
                BinaryFormatter serializer = new BinaryFormatter();
                //TODO: change path
                using (Stream stream = File.Create(@"c:\Users\minka\Downloads\rcv1\weights.bin"))
                {
                    serializer.Serialize(stream, train.wPost);
                    serializer.Serialize(stream, train.biasPost);
                }
            }
        }