Exemplo n.º 1
0
        public static void Rcv1Test3()
        {
            int    nf           = 47152;
            var    train        = new BpmTrain2();
            double wVariance    = 10;
            double biasVariance = 10;

            train.SetPriors(nf, wVariance, biasVariance);
            int count = 0;

            //TODO: change path
            foreach (Instance instance in new VwReader(@"c:\Users\minka\Downloads\rcv1\rcv1.train.vw.gz"))
            {
                train.Train(instance);
                count++;
                if (count % 1000 == 0)
                {
                    Console.WriteLine("{0} {1}", count, train.biasPost);
                    if (count == 10000)
                    {
                        break;
                    }
                }
            }
        }
Exemplo n.º 2
0
        public static void Rcv1Test3()
        {
            int    nf           = 47152;
            var    train        = new BpmTrain2();
            double wVariance    = 10;
            double biasVariance = 10;

            train.SetPriors(nf, wVariance, biasVariance);
            int count = 0;

            foreach (Instance instance in new VwReader(Path.Combine(dataFolder, "rcv1.train.vw.gz")))
            {
                train.Train(instance);
                count++;
                if (count % 1000 == 0)
                {
                    Console.WriteLine("{0} {1}", count, train.biasPost);
                    if (count == 10000)
                    {
                        break;
                    }
                }
            }
        }
Exemplo n.º 3
0
        public static void Rcv1Test(double wVariance, double biasVariance)
        {
            int count = 0;

            if (false)
            {
                int maxFeatureIndex = 0;
                foreach (Instance instance in new VwReader(Path.Combine(dataFolder, "rcv1.train.vw.gz")))
                {
                    count++;
                    if (count % 10000 == 0)
                    {
                        Console.WriteLine(count);
                    }
                    foreach (int index in instance.featureIndices)
                    {
                        if (index > maxFeatureIndex)
                        {
                            maxFeatureIndex = index;
                        }
                    }
                }
                Console.WriteLine("{0} features", maxFeatureIndex + 1);
            }
            int nf      = 47152;
            var train   = new BpmTrain2();
            var predict = new BpmPredict2();

            train.SetPriors(nf, wVariance, biasVariance);

            StreamWriter writer = new StreamWriter(Path.Combine(dataFolder, "log.txt"));
            int          errors = 0;

            //int errors2 = 0;
            //StreamReader reader = new StreamReader(Path.Combine(dataFolder, "preds.txt");
            // takes 92s to train
            // takes 74s just to read the data
            // takes 15s just to do 'wc' on the data
            // there are 781265 data points in train, 23149 in test
            foreach (Instance instance in new VwReader(Path.Combine(dataFolder, "rcv1.train.vw.gz")))
            {
                predict.SetPriors(train.wPost, train.biasPost);
                bool yPred = predict.Predict(instance);
                if (yPred != instance.label)
                {
                    errors++;
                }
                //double pred2 = double.Parse(reader.ReadLine());
                //if ((pred2 > 0.5) != instance.label) errors2++;
                train.Train(instance);
                count++;
                if (count % 1000 == 0)
                {
                    Console.WriteLine("{0} {1} {2}", count, (double)errors / count, train.biasPost);
                    //Console.WriteLine("{0} {1} {2} {3}", count, (double)errors/count, (double)errors2/count, train.biasPost);
                    writer.WriteLine("{0} {1}", count, (double)errors / count);
                    writer.Flush();
                    //if (count == 10000) break;
                }
            }
            writer.Dispose();
#if NETFRAMEWORK
            // In the .NET 5.0 BinaryFormatter is obsolete
            // and would produce errors. This test code should be migrated.
            // See https://aka.ms/binaryformatter

            if (true)
            {
                BinaryFormatter serializer = new BinaryFormatter();
                using (Stream stream = File.Create(Path.Combine(dataFolder, "weights.bin")))
                {
                    serializer.Serialize(stream, train.wPost);
                    serializer.Serialize(stream, train.biasPost);
                }
            }
#endif
        }
Exemplo n.º 4
0
#pragma warning disable 162
#endif

        public static void Rcv1Test(double wVariance, double biasVariance)
        {
            int count = 0;

            if (false)
            {
                int maxFeatureIndex = 0;
                //TODO: change path
                foreach (Instance instance in new VwReader(@"c:\Users\minka\Downloads\rcv1\rcv1.train.vw.gz"))
                {
                    count++;
                    if (count % 10000 == 0)
                    {
                        Console.WriteLine(count);
                    }
                    foreach (int index in instance.featureIndices)
                    {
                        if (index > maxFeatureIndex)
                        {
                            maxFeatureIndex = index;
                        }
                    }
                }
                Console.WriteLine("{0} features", maxFeatureIndex + 1);
            }
            int nf      = 47152;
            var train   = new BpmTrain2();
            var predict = new BpmPredict2();

            train.SetPriors(nf, wVariance, biasVariance);

            //TODO: change path
            StreamWriter writer = new StreamWriter(@"c:\Users\minka\Downloads\rcv1\log.txt");
            int          errors = 0;

            //int errors2 = 0;
            //TODO: change path
            //StreamReader reader = new StreamReader(@"c:\Users\minka\Downloads\rcv1\preds.txt");
            // takes 92s to train
            // takes 74s just to read the data
            // takes 15s just to do 'wc' on the data
            // there are 781265 data points in train, 23149 in test
            //TODO: change path
            foreach (Instance instance in new VwReader(@"c:\Users\minka\Downloads\rcv1\rcv1.train.vw.gz"))
            {
                predict.SetPriors(train.wPost, train.biasPost);
                bool yPred = predict.Predict(instance);
                if (yPred != instance.label)
                {
                    errors++;
                }
                //double pred2 = double.Parse(reader.ReadLine());
                //if ((pred2 > 0.5) != instance.label) errors2++;
                train.Train(instance);
                count++;
                if (count % 1000 == 0)
                {
                    Console.WriteLine("{0} {1} {2}", count, (double)errors / count, train.biasPost);
                    //Console.WriteLine("{0} {1} {2} {3}", count, (double)errors/count, (double)errors2/count, train.biasPost);
                    writer.WriteLine("{0} {1}", count, (double)errors / count);
                    writer.Flush();
                    //if (count == 10000) break;
                }
            }
            writer.Dispose();
            if (true)
            {
                BinaryFormatter serializer = new BinaryFormatter();
                //TODO: change path
                using (Stream stream = File.Create(@"c:\Users\minka\Downloads\rcv1\weights.bin"))
                {
                    serializer.Serialize(stream, train.wPost);
                    serializer.Serialize(stream, train.biasPost);
                }
            }
        }