Example #1
0
        private void doExport(string fpath, string opath)
        {
            var sample = new ClassifiedSample <string>();

            using (var srcFile = File.Open(fpath, FileMode.Open, FileAccess.Read))
                using (var srcReader = new StreamReader(srcFile))
                {
                    var line = srcReader.ReadLine();
                    var segs = line.Split(SEPARATOR, StringSplitOptions.RemoveEmptyEntries);
                    var cls  = m_Classes[segs[0]];
                    var doc  = segs[1];

                    sample.Add(doc, cls);
                }

            var vocabulary = Alg.ExtractVocabulary(sample);
            var dim        = vocabulary.Count;
            var builder    = new StringBuilder();

            using (var outFile = File.Open(opath, FileMode.CreateNew, FileAccess.Write))
                using (var outWriter = new StreamWriter(outFile))
                {
                    for (int i = 0; i < dim; i++)
                    {
                        builder.AppendFormat("{0},", vocabulary[i]);
                    }
                    builder.Append("_class,_value,_training");

                    outWriter.WriteLine(builder.ToString());

                    foreach (var pData in sample)
                    {
                        var  doc = pData.Key;
                        var  cls = pData.Value;
                        bool isEmpty;
                        var  data = Alg.ExtractFeatureVector(doc, out isEmpty);
                        if (isEmpty)
                        {
                            continue;
                        }

                        builder.Clear();
                        for (int i = 0; i < dim; i++)
                        {
                            builder.AppendFormat("{0},", data[i]);
                        }
                        builder.AppendFormat("{0},{1},{2}", cls.Name, cls.Value, 1);

                        outWriter.WriteLine(builder.ToString());
                    }
                }
        }
Example #2
0
        private DecisionNode <TObj> trainID3Core(IEnumerable <Predicate <TObj> > patterns, ClassifiedSample <TObj> sample, IInformativityIndex <TObj> informativity)
        {
            if (!sample.Any())
            {
                throw new MLException("Empty sample");
            }

            var cls = sample.First().Value;

            if (sample.All(kvp => kvp.Value.Equals(cls)))
            {
                return(new LeafNode <TObj>(cls));
            }

            var pattern   = informativity.Max(patterns, sample);
            var negSample = new ClassifiedSample <TObj>();
            var posSample = new ClassifiedSample <TObj>();

            foreach (var pData in sample)
            {
                if (pattern(pData.Key))
                {
                    posSample.Add(pData.Key, pData.Value);
                }
                else
                {
                    negSample.Add(pData.Key, pData.Value);
                }
            }

            if (!negSample.Any() || !posSample.Any())
            {
                var majorClass = sample.GroupBy(pd => pd.Value)
                                 .Select(g => new KeyValuePair <Class, int>(g.Key, g.Count()))
                                 .OrderByDescending(c => c.Value)
                                 .First();
                return(new LeafNode <TObj>(majorClass.Key));
            }

            var node    = new InnerNode <TObj>(pattern);
            var negNode = trainID3Core(patterns, negSample, informativity);
            var posNode = trainID3Core(patterns, posSample, informativity);

            node.SetNegativeNode(negNode);
            node.SetPositiveNode(posNode);

            return(node);
        }
Example #3
0
        private void loadSample(string ipath, string lpath, ClassifiedSample <double[][, ]> sample)
        {
            using (var ifile = File.Open(ipath, FileMode.Open, FileAccess.Read))
                using (var lfile = File.Open(lpath, FileMode.Open, FileAccess.Read))
                {
                    var header = ReadInt32BigEndian(ifile);
                    if (header != 2051)
                    {
                        throw new Exception("Incorrect MNIST image datafile");
                    }
                    header = ReadInt32BigEndian(lfile);
                    if (header != 2049)
                    {
                        throw new Exception("Incorrect MNIST label datafile");
                    }

                    var count = ReadInt32BigEndian(ifile);
                    var rows  = ReadInt32BigEndian(ifile);
                    var cols  = ReadInt32BigEndian(ifile);

                    ReadInt32BigEndian(lfile);

                    for (int q = 0; q < count; q++)
                    {
                        var data = new double[1][, ] {
                            new double[rows, cols]
                        };
                        for (int i = 0; i < rows; i++)
                        {
                            for (int j = 0; j < cols; j++)
                            {
                                var shade = ifile.ReadByte(); // do not invert 255-* because we want to keep logical format: 0=white, 255=black - not image color format!
                                data[0][i, j] = shade / 255.0D;
                            }
                        }

                        var label = lfile.ReadByte();
                        sample.Add(data, m_Classes[label]);
                    }

                    Console.WriteLine("Loaded: {0}", ipath);
                    Console.WriteLine("Loaded: {0}", lpath);
                }
        }
Example #4
0
        private void loadSample(string[] fpaths, ClassifiedSample <double[][, ]> sample)
        {
            foreach (var fpath in fpaths)
            {
                using (var file = File.Open(fpath, FileMode.Open, FileAccess.Read))
                {
                    while (true)
                    {
                        var label = file.ReadByte();
                        if (label < 0)
                        {
                            break;
                        }

                        Class cls;
                        if (!m_Classes.TryGetValue(label, out cls))
                        {
                            file.Seek(3 * 32 * 32, SeekOrigin.Current);
                            continue;
                        }

                        var data = new double[3][, ];
                        data[0] = new double[32, 32];
                        data[1] = new double[32, 32];
                        data[2] = new double[32, 32];

                        for (int d = 0; d < 3; d++)
                        {
                            for (int y = 0; y < 32; y++)
                            {
                                for (int x = 0; x < 32; x++)
                                {
                                    data[d][y, x] = file.ReadByte() / 255.0D;
                                }
                            }
                        }

                        sample.Add(data, cls);
                    }
                }
            }
        }
Example #5
0
        private void loadTrain(string path, string lpath, ClassifiedSample <double[][, ]> sample)
        {
            sample.Clear();

            using (var lfile = File.Open(lpath, FileMode.Open, FileAccess.Read))
                using (var reader = new StreamReader(lfile))
                {
                    reader.ReadLine(); // read label file header

                    var dir = new DirectoryInfo(path);
                    foreach (var file in dir.EnumerateFiles())
                    {
                        var data    = loadFile(file.FullName);
                        var clsName = reader.ReadLine().Split(',')[1];
                        var cls     = m_Classes.First(c => c.Value.Name.Equals(clsName)).Value;
                        sample.Add(data, cls);
                    }
                }

            Console.WriteLine("Loaded files from: {0}", path);
        }
Example #6
0
        private void loadSample(string ipath, ClassifiedSample <double[][, ]> sample)
        {
            sample.Clear();

            using (var ifile = File.Open(ipath, FileMode.Open, FileAccess.Read))
                using (var reader = new StreamReader(ifile))
                {
                    var header = reader.ReadLine();

                    while (true)
                    {
                        var str = reader.ReadLine();
                        if (string.IsNullOrWhiteSpace(str))
                        {
                            break;
                        }

                        var raw = str.Split(',')
                                  .Select(d => int.Parse(d))
                                  .ToArray();

                        var label = raw[0];
                        var data  = new double[1][, ] {
                            new double[IMG_SIZE, IMG_SIZE]
                        };

                        for (int i = 1; i <= IMG_SIZE * IMG_SIZE; i++)
                        {
                            var shade = raw[i]; // do not invert 255-* because we want to keep logical format: 0=white, 255=black - not image color format!
                            var x     = (i - 1) % IMG_SIZE;
                            var y     = (i - 1) / IMG_SIZE;
                            data[0][y, x] = shade / 255.0D;
                        }
                        sample.Add(data, m_Classes[label]);
                    }

                    Console.WriteLine("Loaded: {0}", ipath);
                }
        }
Example #7
0
        private void loadSample(string[] fpaths, ClassifiedSample <double[][, ]> sample)
        {
            foreach (var fpath in fpaths)
            {
                using (var file = File.Open(fpath, FileMode.Open, FileAccess.Read))
                {
                    while (true)
                    {
                        var label = file.ReadByte();
                        if (label < 0)
                        {
                            break;
                        }

                        var cls = m_Classes[label];

                        var data = new double[3][, ];
                        data[0] = new double[32, 32];
                        data[1] = new double[32, 32];
                        data[2] = new double[32, 32];

                        for (int d = 0; d < 3; d++)
                        {
                            for (int y = 0; y < 32; y++)
                            {
                                for (int x = 0; x < 32; x++)
                                {
                                    data[d][y, x] = file.ReadByte() / 255.0D;
                                }
                            }
                        }

                        sample.Add(data, cls);
                    }
                }
            }
        }
Example #8
0
        private void readBody(StreamReader reader, int[] featureIndxs, int trainingIndx, int classesIndx, int clsValIdx)
        {
            var dim     = featureIndxs.Length;
            var lineNum = 0;

            while (true)
            {
                lineNum++;
                var line = reader.ReadLine();
                if (line != null && line.StartsWith("//"))
                {
                    continue;
                }
                if (string.IsNullOrWhiteSpace(line))
                {
                    break;
                }
                var data = line.Split(',');

                var success = true;
                var point   = new double[dim];
                for (var i = 0; i < dim; i++)
                {
                    double result;
                    var    ftIdx = featureIndxs[i];
                    if (!double.TryParse(data[ftIdx], out result))
                    {
                        success = false;
                        break;
                    }
                    point[i] = result;
                }

                if (!success)
                {
                    Errors.Add(new DataError {
                        LineNum = lineNum, Line = line
                    });
                    continue;
                }
                Class cls;
                var   clsName = data[classesIndx];
                if (!Classes.TryGetValue(clsName, out cls))
                {
                    double val;
                    var    value = (clsValIdx < 0 || !double.TryParse(data[clsValIdx], out val)) ? (double?)null : val;
                    cls = new Class(clsName, value);
                    Classes[clsName] = cls;
                }

                Data.Add(point, cls);

                if (trainingIndx >= 0)
                {
                    var isTraining = int.Parse(data[trainingIndx]) != 0;
                    if (isTraining)
                    {
                        TrainingSample.Add(point, cls);
                    }
                }
            }
        }