コード例 #1
0
        public static float[] GetStdDevs(float[] means, params string[] srcPaths)
        {
            var counts   = new int[RawRecord.NUMERIC_COUNT];
            var squareds = new double[RawRecord.NUMERIC_COUNT];
            var res      = new float[RawRecord.NUMERIC_COUNT];

            foreach (var srcPath in srcPaths)
            {
                foreach (var src in OneHotRecord.EnumerateBinLines(srcPath))
                {
                    for (var i = 0; i < RawRecord.NUMERIC_COUNT; i++)
                    {
                        var val = (double)src.NumericData[i];
                        if (!double.IsNaN(val))
                        {
                            var diff    = val - (double)means[i];
                            var squared = diff * diff;
                            squareds[i] += squared;
                            counts[i]   += 1;
                        }
                    }
                }
            }
            for (var i = 0; i < RawRecord.NUMERIC_COUNT; i++)
            {
                var squared = squareds[i];
                var count   = counts[i];
                res[i] = (float)Math.Sqrt(squared / (double)count);
            }
            return(res);
        }
コード例 #2
0
        public static void ScaleNumericValues(string srcTrainPath, string srcTestPath, string dstTrainPath, string dstTestPath)
        {
            Console.WriteLine("Computing means");
            var means = OneHotRecord.GetMeans(srcTrainPath, srcTestPath);

            Console.WriteLine("Computing stddevs");
            var stdevs = OneHotRecord.GetStdDevs(means, srcTrainPath, srcTestPath);
            var paths  = new List <string> {
                srcTrainPath + "^" + dstTrainPath, srcTestPath + "^" + dstTestPath
            };

            foreach (var pathItem in paths)
            {
                var pathItems = pathItem.Split('^');
                var srcPath   = pathItems[0];
                var dstPath   = pathItems[1];

                if (File.Exists(dstPath))
                {
                    File.Delete(dstPath);
                }
                var fileStream       = File.OpenWrite(dstPath);
                var compressedStream = new DeflateStream(fileStream, CompressionMode.Compress);
                var writer           = new BinaryWriter(compressedStream);

                Console.WriteLine("Standardizing" + Path.GetFileName(srcPath));
                var writeNo = 0;
                foreach (var rec in OneHotRecord.EnumerateBinLines(srcPath))
                {
                    for (var i = 0; i < RawRecord.NUMERIC_COUNT; i++)
                    {
                        var val = rec.NumericData[i];
                        if (float.IsNaN(val))
                        {
                            rec.NumericData[i] = 0f;
                        }
                        else
                        {
                            var newVal = (rec.NumericData[i] - means[i]) / stdevs[i];
                            if (newVal > 3f)
                            {
                                newVal = 3f;
                            }
                            if (newVal < -3f)
                            {
                                newVal = -3f;
                            }
                            rec.NumericData[i] = newVal;
                        }
                    }
                    rec.WriteBinary(writer);
                    writeNo++;
                }
                writer.Flush();
                compressedStream.Flush();
                compressedStream.Close();
                fileStream.Close();
            }
        }
コード例 #3
0
        public static float[] GetMeans(params string[] srcPaths)
        {
            var means       = new float[RawRecord.NUMERIC_COUNT];
            var totals      = new double[RawRecord.NUMERIC_COUNT];
            var counts      = new int[RawRecord.NUMERIC_COUNT];
            var label1count = 0;
            var recordCount = 0;

            foreach (var srcPath in srcPaths)
            {
                foreach (var src in OneHotRecord.EnumerateBinLines(srcPath))
                {
                    if (src.Label != 0)
                    {
                        label1count++;
                    }
                    recordCount++;
                    for (var i = 0; i < RawRecord.NUMERIC_COUNT; i++)
                    {
                        var val = src.NumericData[i];
                        if (!float.IsNaN(val))
                        {
                            totals[i] += val;
                            counts[i] += 1;
                        }
                    }
                }
            }

            Console.WriteLine("Labels : " + label1count + "//" + recordCount);
            for (var i = 0; i < RawRecord.NUMERIC_COUNT; i++)
            {
                means[i] = (float)totals[i] / (float)counts[i];
            }
            return(means);
        }