Esempio n. 1
0
        public static MultiRegressionSample <TObj> ClassifiedToRegressionSample <TObj>(ClassifiedSample <TObj> sample)
        {
            if (sample == null)
            {
                return(null);
            }

            var classes = sample.Classes.ToList();
            var count   = classes.Count;
            var marks   = new double[count][];

            for (int i = 0; i < count; i++)
            {
                marks[i]    = new double[count];
                marks[i][i] = 1;
            }

            var result = new MultiRegressionSample <TObj>();

            foreach (var data in sample)
            {
                var cls  = data.Value;
                var idx  = classes.IndexOf(cls);
                var mark = marks[idx];
                result.Add(data.Key, mark);
            }

            return(result);
        }
Esempio n. 2
0
        private void loadData(string path, MultiRegressionSample <double[][, ]> sample)
        {
            sample.Clear();

            var dir    = new DirectoryInfo(path);
            var loaded = 0;
            var total  = dir.GetFiles().Length;

            var marks = new Dictionary <int, double[]>();
            var lpath = Path.Combine(path, "labels.csv");

            using (var lfile = File.Open(lpath, FileMode.Open, FileAccess.Read))
                using (var reader = new StreamReader(lfile))
                {
                    reader.ReadLine(); // header
                    while (true)
                    {
                        var line = reader.ReadLine();
                        if (string.IsNullOrWhiteSpace(line))
                        {
                            break;
                        }

                        var segs = line.Split(',');
                        var id   = int.Parse(segs[0]);
                        var len  = segs.Length - 1;
                        var mark = new double[len];
                        for (int i = 0; i < len; i++)
                        {
                            mark[i] = int.Parse(segs[i + 1]) / 255.0D;
                        }

                        marks[id] = mark;
                    }
                }

            foreach (var file in dir.EnumerateFiles().Where(f => f.Name.StartsWith(IMG_PREFIX)))
            {
                var fname = Path.GetFileNameWithoutExtension(file.Name);
                var id    = int.Parse(fname.Substring(IMG_PREFIX.Length));
                var data  = loadFile(file.FullName);
                Utils.ExportImageData(data, @"F:\Work\science\Machine learning\data\main-colors\train\1.png");

                sample.Add(data, marks[id]);
                loaded++;
                if (loaded % 1000 == 0)
                {
                    Console.Write("\rloaded: {0} of {1}        ", loaded, total);
                }
            }
            ;

            Console.WriteLine("\nLoaded files from: {0}", path);
        }
Esempio n. 3
0
        private void loadSample(string ipath, string lpath, MultiRegressionSample <double[][, ]> sample)
        {
            using (var ifile = File.Open(ipath, FileMode.Open, FileAccess.Read))
                using (var lfile = File.Open(lpath, FileMode.Open, FileAccess.Read))
                {
                    var header = ReadInt32BigEndian(ifile);
                    if (header != 2051)
                    {
                        throw new Exception("Incorrect MNIST image datafile");
                    }
                    header = ReadInt32BigEndian(lfile);
                    if (header != 2049)
                    {
                        throw new Exception("Incorrect MNIST label datafile");
                    }

                    var count = ReadInt32BigEndian(ifile);
                    var rows  = ReadInt32BigEndian(ifile);
                    var cols  = ReadInt32BigEndian(ifile);

                    ReadInt32BigEndian(lfile);

                    for (int q = 0; q < count; q++)
                    {
                        var data = new double[1][, ] {
                            new double[rows, cols]
                        };
                        for (int i = 0; i < rows; i++)
                        {
                            for (int j = 0; j < cols; j++)
                            {
                                var shade = ifile.ReadByte(); // do not invert 255-* because we want to keep logical format: 0=white, 255=black - not image color format!
                                data[0][i, j] = shade / 255.0D;
                            }
                        }

                        var label = lfile.ReadByte();
                        sample.Add(data, m_Marks[label]);
                    }

                    Console.WriteLine("Loaded: {0}", ipath);
                    Console.WriteLine("Loaded: {0}", lpath);
                }
        }
Esempio n. 4
0
        private void loadSample(string[] fpaths, MultiRegressionSample <double[][, ]> sample)
        {
            foreach (var fpath in fpaths)
            {
                using (var file = File.Open(fpath, FileMode.Open, FileAccess.Read))
                {
                    while (true)
                    {
                        var label = file.ReadByte();
                        if (label < 0)
                        {
                            break;
                        }

                        double[] mark;
                        if (!m_Marks.TryGetValue(label, out mark))
                        {
                            file.Seek(3 * 32 * 32, SeekOrigin.Current);
                            continue;
                        }

                        var data = new double[3][, ];
                        data[0] = new double[32, 32];
                        data[1] = new double[32, 32];
                        data[2] = new double[32, 32];

                        for (int d = 0; d < 3; d++)
                        {
                            for (int y = 0; y < 32; y++)
                            {
                                for (int x = 0; x < 32; x++)
                                {
                                    data[d][y, x] = file.ReadByte() / 255.0D;
                                }
                            }
                        }

                        sample.Add(data, mark);
                    }
                }
            }
        }
Esempio n. 5
0
        private void loadSample(string ipath, MultiRegressionSample <double[][, ]> sample)
        {
            sample.Clear();

            using (var ifile = File.Open(ipath, FileMode.Open, FileAccess.Read))
                using (var reader = new StreamReader(ifile))
                {
                    var header = reader.ReadLine();

                    while (true)
                    {
                        var str = reader.ReadLine();
                        if (string.IsNullOrWhiteSpace(str))
                        {
                            break;
                        }

                        var raw = str.Split(',')
                                  .Select(d => int.Parse(d))
                                  .ToArray();

                        var label = raw[0];
                        var data  = new double[1][, ] {
                            new double[IMG_SIZE, IMG_SIZE]
                        };

                        for (int i = 1; i <= IMG_SIZE * IMG_SIZE; i++)
                        {
                            var shade = raw[i]; // do not invert 255-* because we want to keep logical format: 0=white, 255=black - not image color format!
                            var x     = (i - 1) % IMG_SIZE;
                            var y     = (i - 1) / IMG_SIZE;
                            data[0][y, x] = shade / 255.0D;
                        }
                        sample.Add(data, m_Marks[label]);
                    }

                    Console.WriteLine("Loaded: {0}", ipath);
                }
        }
Esempio n. 6
0
        private void loadTrain(string path, MultiRegressionSample <double[][, ]> sample)
        {
            sample.Clear();

            var dir    = new DirectoryInfo(path);
            var loaded = 0;
            var total  = dir.GetFiles().Length;

            Parallel.ForEach(dir.EnumerateFiles(), file =>
            {
                var data = loadFile(file.FullName);

                double[] mark;
                if (file.Name.StartsWith(CAT_PREFIX))
                {
                    mark = m_Marks[0];
                }
                else if (file.Name.StartsWith(DOG_PREFIX))
                {
                    mark = m_Marks[1];
                }
                else
                {
                    throw new MLException("Unknown file");
                }

                lock (sample)
                {
                    sample.Add(data, mark);
                    loaded++;
                    if (loaded % 1000 == 0)
                    {
                        Console.Write("\rloaded: {0} of {1}        ", loaded, total);
                    }
                }
            });

            Console.WriteLine("\nLoaded files from: {0}", path);
        }
Esempio n. 7
0
        private void loadTrain(string path, string lpath, MultiRegressionSample <double[][, ]> sample)
        {
            sample.Clear();

            using (var lfile = File.Open(lpath, FileMode.Open, FileAccess.Read))
                using (var reader = new StreamReader(lfile))
                {
                    reader.ReadLine(); // read label file header

                    var dir = new DirectoryInfo(path);
                    foreach (var file in dir.EnumerateFiles())
                    {
                        var data    = loadFile(file.FullName);
                        var clsName = reader.ReadLine().Split(',')[1];
                        var cls     = m_Classes.First(c => c.Value.Name.Equals(clsName));
                        var mark    = m_Marks[cls.Key];
                        sample.Add(data, mark);
                    }
                }

            Console.WriteLine("Loaded files from: {0}", path);
        }
Esempio n. 8
0
        private void loadData(string path, MultiRegressionSample <double[][, ]> sample)
        {
            sample.Clear();

            var dir    = new DirectoryInfo(path);
            var loaded = 0;
            var total  = dir.GetFiles().Length;

            foreach (var file in dir.EnumerateFiles())
            {
                var data = LoadFile(file.FullName);
                //Utils.ExportImageData(data, @"F:\Work\science\Machine learning\data\cat-dog\train\"+(c++)+".png");

                double[] mark;
                if (file.Name.StartsWith(CAT_PREFIX))
                {
                    mark = m_Marks[0];
                }
                else if (file.Name.StartsWith(DOG_PREFIX))
                {
                    mark = m_Marks[1];
                }
                else
                {
                    throw new MLException("Unknown file");
                }

                sample.Add(data, mark);
                loaded++;
                if (loaded % 1000 == 0)
                {
                    Console.Write("\rloaded: {0} of {1}        ", loaded, total);
                }
            }
            ;

            Console.WriteLine("\nLoaded files from: {0}", path);
        }
Esempio n. 9
0
        private void loadSample(string[] fpaths, MultiRegressionSample <double[][, ]> sample)
        {
            foreach (var fpath in fpaths)
            {
                using (var file = File.Open(fpath, FileMode.Open, FileAccess.Read))
                {
                    while (true)
                    {
                        var label = file.ReadByte();
                        if (label < 0)
                        {
                            break;
                        }

                        var cls = m_Marks[label];

                        var data = new double[3][, ];
                        data[0] = new double[32, 32];
                        data[1] = new double[32, 32];
                        data[2] = new double[32, 32];

                        for (int d = 0; d < 3; d++)
                        {
                            for (int y = 0; y < 32; y++)
                            {
                                for (int x = 0; x < 32; x++)
                                {
                                    data[d][y, x] = file.ReadByte() / 255.0D;
                                }
                            }
                        }

                        sample.Add(data, cls);
                    }
                }
            }
        }
Esempio n. 10
0
        private void onTestButtonClick(object sender, RoutedEventArgs e)
        {
            var path     = @"F:\Work\science\Machine learning\data\cat-dog\train\kaggle";
            var errors1  = 0;
            var errors1C = 0;
            var errors1D = 0;
            var errors2  = 0;
            var errors2C = 0;
            var errors2D = 0;
            var errorsC1 = 0;
            var errorsC2 = 0;
            var errorsR  = 0;
            var pct1     = 0;
            var pct1C    = 0;
            var pct1D    = 0;
            var pct2     = 0;
            var pct2C    = 0;
            var pct2D    = 0;
            var pctC     = 0;
            var pctC1    = 0;
            var pctC2    = 0;
            var pctR     = 0;
            var alp1     = 0.95D;
            var alp2     = 0.05D;
            var dir      = new DirectoryInfo(path);
            var total    = dir.GetFiles().Length;

            var sample = new MultiRegressionSample <double[][, ]>();
            var cat    = new double[] { 1.0D, 0.0D };
            var dog    = new double[] { 0.0D, 1.0D };

            int cnt = 0;

            foreach (var file in dir.EnumerateFiles().Shuffle(0).Skip(10000).Take(500))
            {
                var fname    = Path.GetFileNameWithoutExtension(file.Name);
                var expected = fname.StartsWith("cat.") ? 0 : 1;
                var data1    = getNetData(file.FullName);
                double[][,] data2;
                using (var image = (Bitmap)System.Drawing.Image.FromFile(file.FullName))
                    data2 = getNetFData(image);

                sample.Add(data2, expected == 0 ? cat : dog);

                var result1 = m_Network.Calculate(data1).Select(d => d[0, 0]).ToArray();
                var actual1 = ML.Core.Mathematics.MathUtils.ArgMax(result1);
                if (expected != actual1)
                {
                    if (expected == 0)
                    {
                        errors1C++;
                    }
                    else
                    {
                        errors1D++;
                    }
                    errors1++;
                }

                var result2 = m_NetworkF.Calculate(data2).Select(d => d[0, 0]).ToArray();
                var actual2 = ML.Core.Mathematics.MathUtils.ArgMax(result2);
                if (expected != actual2)
                {
                    if (expected == 0)
                    {
                        errors2C++;
                    }
                    else
                    {
                        errors2D++;
                    }
                    errors2++;
                }

                var resultR = new double[result1.Length];
                resultR[0] = alp1 * result1[0] + (1 - alp1) * result2[0];
                resultR[1] = alp2 * result1[1] + (1 - alp2) * result2[1];
                var actualR = ML.Core.Mathematics.MathUtils.ArgMax(resultR);
                if (expected != actualR)
                {
                    errorsR++;
                }

                if ((expected != actual1) && (expected != actual2))
                {
                    if (expected == 0)
                    {
                        errorsC1++;
                    }
                    else
                    {
                        errorsC2++;
                    }
                }

                cnt++;
                pct1  = errors1 * 100 / cnt;
                pct2  = errors2 * 100 / cnt;
                pctC1 = errorsC1 * 100 / cnt;
                pctC2 = errorsC2 * 100 / cnt;
                pctC  = (errorsC1 + errorsC2) * 100 / cnt;
                pctR  = errorsR * 100 / cnt;
            }

            var alg = new BackpropAlgorithm(m_NetworkF);
            var err = alg.GetErrors(sample, 0, true);

            var message = "Errors1: {0}%, Errors2: {1}%, ErrorsC: {2}%, ErrorR: {3}%";

            MessageBox.Show(string.Format(message, pct1, pct2, pctC, pctR));
        }