public static MultiRegressionSample <TObj> ClassifiedToRegressionSample <TObj>(ClassifiedSample <TObj> sample) { if (sample == null) { return(null); } var classes = sample.Classes.ToList(); var count = classes.Count; var marks = new double[count][]; for (int i = 0; i < count; i++) { marks[i] = new double[count]; marks[i][i] = 1; } var result = new MultiRegressionSample <TObj>(); foreach (var data in sample) { var cls = data.Value; var idx = classes.IndexOf(cls); var mark = marks[idx]; result.Add(data.Key, mark); } return(result); }
private void loadData(string path, MultiRegressionSample <double[][, ]> sample) { sample.Clear(); var dir = new DirectoryInfo(path); var loaded = 0; var total = dir.GetFiles().Length; var marks = new Dictionary <int, double[]>(); var lpath = Path.Combine(path, "labels.csv"); using (var lfile = File.Open(lpath, FileMode.Open, FileAccess.Read)) using (var reader = new StreamReader(lfile)) { reader.ReadLine(); // header while (true) { var line = reader.ReadLine(); if (string.IsNullOrWhiteSpace(line)) { break; } var segs = line.Split(','); var id = int.Parse(segs[0]); var len = segs.Length - 1; var mark = new double[len]; for (int i = 0; i < len; i++) { mark[i] = int.Parse(segs[i + 1]) / 255.0D; } marks[id] = mark; } } foreach (var file in dir.EnumerateFiles().Where(f => f.Name.StartsWith(IMG_PREFIX))) { var fname = Path.GetFileNameWithoutExtension(file.Name); var id = int.Parse(fname.Substring(IMG_PREFIX.Length)); var data = loadFile(file.FullName); Utils.ExportImageData(data, @"F:\Work\science\Machine learning\data\main-colors\train\1.png"); sample.Add(data, marks[id]); loaded++; if (loaded % 1000 == 0) { Console.Write("\rloaded: {0} of {1} ", loaded, total); } } ; Console.WriteLine("\nLoaded files from: {0}", path); }
private void loadSample(string ipath, string lpath, MultiRegressionSample <double[][, ]> sample) { using (var ifile = File.Open(ipath, FileMode.Open, FileAccess.Read)) using (var lfile = File.Open(lpath, FileMode.Open, FileAccess.Read)) { var header = ReadInt32BigEndian(ifile); if (header != 2051) { throw new Exception("Incorrect MNIST image datafile"); } header = ReadInt32BigEndian(lfile); if (header != 2049) { throw new Exception("Incorrect MNIST label datafile"); } var count = ReadInt32BigEndian(ifile); var rows = ReadInt32BigEndian(ifile); var cols = ReadInt32BigEndian(ifile); ReadInt32BigEndian(lfile); for (int q = 0; q < count; q++) { var data = new double[1][, ] { new double[rows, cols] }; for (int i = 0; i < rows; i++) { for (int j = 0; j < cols; j++) { var shade = ifile.ReadByte(); // do not invert 255-* because we want to keep logical format: 0=white, 255=black - not image color format! data[0][i, j] = shade / 255.0D; } } var label = lfile.ReadByte(); sample.Add(data, m_Marks[label]); } Console.WriteLine("Loaded: {0}", ipath); Console.WriteLine("Loaded: {0}", lpath); } }
private void loadSample(string[] fpaths, MultiRegressionSample <double[][, ]> sample) { foreach (var fpath in fpaths) { using (var file = File.Open(fpath, FileMode.Open, FileAccess.Read)) { while (true) { var label = file.ReadByte(); if (label < 0) { break; } double[] mark; if (!m_Marks.TryGetValue(label, out mark)) { file.Seek(3 * 32 * 32, SeekOrigin.Current); continue; } var data = new double[3][, ]; data[0] = new double[32, 32]; data[1] = new double[32, 32]; data[2] = new double[32, 32]; for (int d = 0; d < 3; d++) { for (int y = 0; y < 32; y++) { for (int x = 0; x < 32; x++) { data[d][y, x] = file.ReadByte() / 255.0D; } } } sample.Add(data, mark); } } } }
private void loadSample(string ipath, MultiRegressionSample <double[][, ]> sample) { sample.Clear(); using (var ifile = File.Open(ipath, FileMode.Open, FileAccess.Read)) using (var reader = new StreamReader(ifile)) { var header = reader.ReadLine(); while (true) { var str = reader.ReadLine(); if (string.IsNullOrWhiteSpace(str)) { break; } var raw = str.Split(',') .Select(d => int.Parse(d)) .ToArray(); var label = raw[0]; var data = new double[1][, ] { new double[IMG_SIZE, IMG_SIZE] }; for (int i = 1; i <= IMG_SIZE * IMG_SIZE; i++) { var shade = raw[i]; // do not invert 255-* because we want to keep logical format: 0=white, 255=black - not image color format! var x = (i - 1) % IMG_SIZE; var y = (i - 1) / IMG_SIZE; data[0][y, x] = shade / 255.0D; } sample.Add(data, m_Marks[label]); } Console.WriteLine("Loaded: {0}", ipath); } }
private void loadTrain(string path, MultiRegressionSample <double[][, ]> sample) { sample.Clear(); var dir = new DirectoryInfo(path); var loaded = 0; var total = dir.GetFiles().Length; Parallel.ForEach(dir.EnumerateFiles(), file => { var data = loadFile(file.FullName); double[] mark; if (file.Name.StartsWith(CAT_PREFIX)) { mark = m_Marks[0]; } else if (file.Name.StartsWith(DOG_PREFIX)) { mark = m_Marks[1]; } else { throw new MLException("Unknown file"); } lock (sample) { sample.Add(data, mark); loaded++; if (loaded % 1000 == 0) { Console.Write("\rloaded: {0} of {1} ", loaded, total); } } }); Console.WriteLine("\nLoaded files from: {0}", path); }
private void loadTrain(string path, string lpath, MultiRegressionSample <double[][, ]> sample) { sample.Clear(); using (var lfile = File.Open(lpath, FileMode.Open, FileAccess.Read)) using (var reader = new StreamReader(lfile)) { reader.ReadLine(); // read label file header var dir = new DirectoryInfo(path); foreach (var file in dir.EnumerateFiles()) { var data = loadFile(file.FullName); var clsName = reader.ReadLine().Split(',')[1]; var cls = m_Classes.First(c => c.Value.Name.Equals(clsName)); var mark = m_Marks[cls.Key]; sample.Add(data, mark); } } Console.WriteLine("Loaded files from: {0}", path); }
private void loadData(string path, MultiRegressionSample <double[][, ]> sample) { sample.Clear(); var dir = new DirectoryInfo(path); var loaded = 0; var total = dir.GetFiles().Length; foreach (var file in dir.EnumerateFiles()) { var data = LoadFile(file.FullName); //Utils.ExportImageData(data, @"F:\Work\science\Machine learning\data\cat-dog\train\"+(c++)+".png"); double[] mark; if (file.Name.StartsWith(CAT_PREFIX)) { mark = m_Marks[0]; } else if (file.Name.StartsWith(DOG_PREFIX)) { mark = m_Marks[1]; } else { throw new MLException("Unknown file"); } sample.Add(data, mark); loaded++; if (loaded % 1000 == 0) { Console.Write("\rloaded: {0} of {1} ", loaded, total); } } ; Console.WriteLine("\nLoaded files from: {0}", path); }
private void loadSample(string[] fpaths, MultiRegressionSample <double[][, ]> sample) { foreach (var fpath in fpaths) { using (var file = File.Open(fpath, FileMode.Open, FileAccess.Read)) { while (true) { var label = file.ReadByte(); if (label < 0) { break; } var cls = m_Marks[label]; var data = new double[3][, ]; data[0] = new double[32, 32]; data[1] = new double[32, 32]; data[2] = new double[32, 32]; for (int d = 0; d < 3; d++) { for (int y = 0; y < 32; y++) { for (int x = 0; x < 32; x++) { data[d][y, x] = file.ReadByte() / 255.0D; } } } sample.Add(data, cls); } } } }
private void onTestButtonClick(object sender, RoutedEventArgs e) { var path = @"F:\Work\science\Machine learning\data\cat-dog\train\kaggle"; var errors1 = 0; var errors1C = 0; var errors1D = 0; var errors2 = 0; var errors2C = 0; var errors2D = 0; var errorsC1 = 0; var errorsC2 = 0; var errorsR = 0; var pct1 = 0; var pct1C = 0; var pct1D = 0; var pct2 = 0; var pct2C = 0; var pct2D = 0; var pctC = 0; var pctC1 = 0; var pctC2 = 0; var pctR = 0; var alp1 = 0.95D; var alp2 = 0.05D; var dir = new DirectoryInfo(path); var total = dir.GetFiles().Length; var sample = new MultiRegressionSample <double[][, ]>(); var cat = new double[] { 1.0D, 0.0D }; var dog = new double[] { 0.0D, 1.0D }; int cnt = 0; foreach (var file in dir.EnumerateFiles().Shuffle(0).Skip(10000).Take(500)) { var fname = Path.GetFileNameWithoutExtension(file.Name); var expected = fname.StartsWith("cat.") ? 0 : 1; var data1 = getNetData(file.FullName); double[][,] data2; using (var image = (Bitmap)System.Drawing.Image.FromFile(file.FullName)) data2 = getNetFData(image); sample.Add(data2, expected == 0 ? cat : dog); var result1 = m_Network.Calculate(data1).Select(d => d[0, 0]).ToArray(); var actual1 = ML.Core.Mathematics.MathUtils.ArgMax(result1); if (expected != actual1) { if (expected == 0) { errors1C++; } else { errors1D++; } errors1++; } var result2 = m_NetworkF.Calculate(data2).Select(d => d[0, 0]).ToArray(); var actual2 = ML.Core.Mathematics.MathUtils.ArgMax(result2); if (expected != actual2) { if (expected == 0) { errors2C++; } else { errors2D++; } errors2++; } var resultR = new double[result1.Length]; resultR[0] = alp1 * result1[0] + (1 - alp1) * result2[0]; resultR[1] = alp2 * result1[1] + (1 - alp2) * result2[1]; var actualR = ML.Core.Mathematics.MathUtils.ArgMax(resultR); if (expected != actualR) { errorsR++; } if ((expected != actual1) && (expected != actual2)) { if (expected == 0) { errorsC1++; } else { errorsC2++; } } cnt++; pct1 = errors1 * 100 / cnt; pct2 = errors2 * 100 / cnt; pctC1 = errorsC1 * 100 / cnt; pctC2 = errorsC2 * 100 / cnt; pctC = (errorsC1 + errorsC2) * 100 / cnt; pctR = errorsR * 100 / cnt; } var alg = new BackpropAlgorithm(m_NetworkF); var err = alg.GetErrors(sample, 0, true); var message = "Errors1: {0}%, Errors2: {1}%, ErrorsC: {2}%, ErrorR: {3}%"; MessageBox.Show(string.Format(message, pct1, pct2, pctC, pctR)); }