public static void Rcv1Test3() { int nf = 47152; var train = new BpmTrain2(); double wVariance = 10; double biasVariance = 10; train.SetPriors(nf, wVariance, biasVariance); int count = 0; //TODO: change path foreach (Instance instance in new VwReader(@"c:\Users\minka\Downloads\rcv1\rcv1.train.vw.gz")) { train.Train(instance); count++; if (count % 1000 == 0) { Console.WriteLine("{0} {1}", count, train.biasPost); if (count == 10000) { break; } } } }
public static void Rcv1Test3() { int nf = 47152; var train = new BpmTrain2(); double wVariance = 10; double biasVariance = 10; train.SetPriors(nf, wVariance, biasVariance); int count = 0; foreach (Instance instance in new VwReader(Path.Combine(dataFolder, "rcv1.train.vw.gz"))) { train.Train(instance); count++; if (count % 1000 == 0) { Console.WriteLine("{0} {1}", count, train.biasPost); if (count == 10000) { break; } } } }
public static void Rcv1Test(double wVariance, double biasVariance) { int count = 0; if (false) { int maxFeatureIndex = 0; foreach (Instance instance in new VwReader(Path.Combine(dataFolder, "rcv1.train.vw.gz"))) { count++; if (count % 10000 == 0) { Console.WriteLine(count); } foreach (int index in instance.featureIndices) { if (index > maxFeatureIndex) { maxFeatureIndex = index; } } } Console.WriteLine("{0} features", maxFeatureIndex + 1); } int nf = 47152; var train = new BpmTrain2(); var predict = new BpmPredict2(); train.SetPriors(nf, wVariance, biasVariance); StreamWriter writer = new StreamWriter(Path.Combine(dataFolder, "log.txt")); int errors = 0; //int errors2 = 0; //StreamReader reader = new StreamReader(Path.Combine(dataFolder, "preds.txt"); // takes 92s to train // takes 74s just to read the data // takes 15s just to do 'wc' on the data // there are 781265 data points in train, 23149 in test foreach (Instance instance in new VwReader(Path.Combine(dataFolder, "rcv1.train.vw.gz"))) { predict.SetPriors(train.wPost, train.biasPost); bool yPred = predict.Predict(instance); if (yPred != instance.label) { errors++; } //double pred2 = double.Parse(reader.ReadLine()); //if ((pred2 > 0.5) != instance.label) errors2++; train.Train(instance); count++; if (count % 1000 == 0) { Console.WriteLine("{0} {1} {2}", count, (double)errors / count, train.biasPost); //Console.WriteLine("{0} {1} {2} {3}", count, (double)errors/count, (double)errors2/count, train.biasPost); writer.WriteLine("{0} {1}", count, (double)errors / count); writer.Flush(); //if (count == 10000) break; } } writer.Dispose(); #if NETFRAMEWORK // In the .NET 5.0 BinaryFormatter is obsolete // and would produce errors. This test code should be migrated. // See https://aka.ms/binaryformatter if (true) { BinaryFormatter serializer = new BinaryFormatter(); using (Stream stream = File.Create(Path.Combine(dataFolder, "weights.bin"))) { serializer.Serialize(stream, train.wPost); serializer.Serialize(stream, train.biasPost); } } #endif }
#pragma warning disable 162 #endif public static void Rcv1Test(double wVariance, double biasVariance) { int count = 0; if (false) { int maxFeatureIndex = 0; //TODO: change path foreach (Instance instance in new VwReader(@"c:\Users\minka\Downloads\rcv1\rcv1.train.vw.gz")) { count++; if (count % 10000 == 0) { Console.WriteLine(count); } foreach (int index in instance.featureIndices) { if (index > maxFeatureIndex) { maxFeatureIndex = index; } } } Console.WriteLine("{0} features", maxFeatureIndex + 1); } int nf = 47152; var train = new BpmTrain2(); var predict = new BpmPredict2(); train.SetPriors(nf, wVariance, biasVariance); //TODO: change path StreamWriter writer = new StreamWriter(@"c:\Users\minka\Downloads\rcv1\log.txt"); int errors = 0; //int errors2 = 0; //TODO: change path //StreamReader reader = new StreamReader(@"c:\Users\minka\Downloads\rcv1\preds.txt"); // takes 92s to train // takes 74s just to read the data // takes 15s just to do 'wc' on the data // there are 781265 data points in train, 23149 in test //TODO: change path foreach (Instance instance in new VwReader(@"c:\Users\minka\Downloads\rcv1\rcv1.train.vw.gz")) { predict.SetPriors(train.wPost, train.biasPost); bool yPred = predict.Predict(instance); if (yPred != instance.label) { errors++; } //double pred2 = double.Parse(reader.ReadLine()); //if ((pred2 > 0.5) != instance.label) errors2++; train.Train(instance); count++; if (count % 1000 == 0) { Console.WriteLine("{0} {1} {2}", count, (double)errors / count, train.biasPost); //Console.WriteLine("{0} {1} {2} {3}", count, (double)errors/count, (double)errors2/count, train.biasPost); writer.WriteLine("{0} {1}", count, (double)errors / count); writer.Flush(); //if (count == 10000) break; } } writer.Dispose(); if (true) { BinaryFormatter serializer = new BinaryFormatter(); //TODO: change path using (Stream stream = File.Create(@"c:\Users\minka\Downloads\rcv1\weights.bin")) { serializer.Serialize(stream, train.wPost); serializer.Serialize(stream, train.biasPost); } } }