Beispiel #1
0
 public static void Serialize(DataSourceSet dss, string path, bool compress = false)
 {
     using (var stream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.None))
     {
         Serialize(dss, stream, compress);
     }
 }
Beispiel #2
0
        private static DataSourceSet ConvertFromSerializableObject(Dictionary <string, Tuple <float[], int[]> > obj)
        {
            var dss = new DataSourceSet();

            foreach (var entry in obj)
            {
                dss.Add(entry.Key, DataSourceFactory.Create(entry.Value.Item1, entry.Value.Item2));
            }

            return(dss);
        }
Beispiel #3
0
 protected override void EndProcessing()
 {
     if (ParameterSetName == "load")
     {
         Path = IO.GetAbsolutePath(this, Path);
         var dss = DataSourceSet.Load(Path, !NoDecompress);
         WriteObject(dss);
     }
     else
     {
         var dss = new DataSourceSet(DataSources);
         WriteObject(dss);
     }
 }
Beispiel #4
0
        public static void Serialize(DataSourceSet dss, Stream stream, bool compress = false)
        {
            var serializer = MessagePackSerializer.Get <Dictionary <string, Tuple <float[], int[]> > >();
            var obj        = ConvertToSerializableObject(dss);

            if (compress)
            {
                using (var zstream = new DeflateStream(stream, CompressionLevel.Optimal, true))
                {
                    serializer.Pack(zstream, obj);
                }
            }
            else
            {
                serializer.Pack(stream, obj);
            }
        }
Beispiel #5
0
        public static IEnumerable <CTFSample> GetSampleReader(TextReader reader)
        {
            int lineCount         = 0;
            int sequenceCount     = 0;
            int seqStartLineCount = 1;

            string line;
            string seqId = null;

            var splitLines = new List <string[]>();

            var comments      = new List <string>();
            var startIndexMap = new Dictionary <string, int>();
            var endIndexMap   = new Dictionary <string, int>();

            while ((line = reader.ReadLine()) != null)
            {
                ++lineCount;

                var splitLine = line.Split(new char[] { '|' });
                var n         = splitLine[0].Trim();

                if (seqId == null || n == seqId)
                {
                    splitLines.Add(splitLine);
                    seqId = n;

                    if (reader.Peek() != -1)
                    {
                        continue;
                    }
                }

                ++sequenceCount;

                var seqDim = splitLines.Count;

                var dss = new DataSourceSet();

                comments.Clear();
                startIndexMap.Clear();
                endIndexMap.Clear();

                for (var i = 0; i < splitLines.Count; ++i)
                {
                    var columns = splitLines[i];
                    for (var j = 1; j < columns.Length; ++j)
                    {
                        var feature = columns[j].Trim();
                        if (feature[0] == '#')
                        {
                            int skip;
                            for (skip = 1; skip < feature.Length && Char.IsWhiteSpace(feature[skip]); ++skip)
                            {
                                ;
                            }

                            comments.Add(feature.Substring(skip));
                            continue;
                        }

                        var items = feature.Split();
                        if (items.Length < 2)
                        {
                            throw new InvalidDataException(string.Format("line {0}: Invalid feature", lineCount));
                        }

                        var featureDim = items.Length - 1;
                        var name       = items[0];

                        DataSourceBase <float, float[]> ds;

                        float[] data;
                        if (dss.Features.ContainsKey(name))
                        {
                            ds   = (DataSourceBase <float, float[]>)dss.Features[name];
                            data = ds.TypedData;
                        }
                        else
                        {
                            data = new float[featureDim * seqDim];
                            ds   = DataSourceFactory.Create(data, new int[] { featureDim, seqDim, 1 });
                            dss.Add(name, ds);
                            startIndexMap[name] = i;
                        }

                        var baseIndex = ds.Shape.GetSequentialIndex(new int[] { 0, i, 0 });
                        for (var k = 0; k < featureDim; ++k)
                        {
                            data[baseIndex + k] = Converter.ToFloat(items[k + 1]);
                        }
                        endIndexMap[name] = i;
                    }
                }

                foreach (var name in dss.Features.Keys.ToArray())
                {
                    var start = startIndexMap[name];
                    var end   = endIndexMap[name];
                    if (start == 0 && end == splitLines.Count - 1)
                    {
                        continue;
                    }

                    var ds = dss[name];
                    dss.Features[name] = ds.Subset(start, end - start + 1, -2);
                }

                yield return(new CTFSample()
                {
                    LineCount = seqStartLineCount,
                    SequenceCount = sequenceCount,
                    SequenceId = seqId,
                    DataSet = dss,
                    Comments = comments
                });

                splitLines.Clear();
                splitLines.Add(splitLine);

                seqStartLineCount = lineCount;
                seqId             = n;
            }
        }
Beispiel #6
0
 public void AddMinibatch(DataSourceSet dataSourceSet)
 {
     _dataQueue.Add(dataSourceSet);
 }
Beispiel #7
0
        private static Dictionary <string, Tuple <float[], int[]> > ConvertToSerializableObject(DataSourceSet dss)
        {
            var obj = new Dictionary <string, Tuple <float[], int[]> >();

            foreach (var entry in dss.Features)
            {
                var ds = entry.Value;
                obj[entry.Key] = new Tuple <float[], int[]>(ds.Data.ToArray(), ds.Shape.Dimensions);
            }

            return(obj);
        }
 public static void Write(string path, DataSourceSet dataSourceSet, bool hasSequenceAxis)
 {
     using (var writer = new StreamWriter(path, false, new UTF8Encoding(false)))
         Write(writer, dataSourceSet, hasSequenceAxis);
 }
        public static void Write(TextWriter writer, DataSourceSet dataSourceSet, bool withSequenceAxis)
        {
            var builder = new CTFBuilder(writer, 0, false);

            // Argument check

            var sampleCount  = dataSourceSet.Features.First().Value.Shape[-1];
            var maxSeqLength = 1;

            foreach (var entry in dataSourceSet)
            {
                var name = entry.Key;
                var ds   = entry.Value;

                if (withSequenceAxis && ds.Shape.Rank < 3)
                {
                    throw new ArgumentException("DataSource shape should have sequence and batch axes as the last two");
                }

                if (!withSequenceAxis && ds.Shape.Rank < 2)
                {
                    throw new ArgumentException("DataSource shape should have a batch axis");
                }

                var count = ds.Shape[-1];
                if (count != sampleCount)
                {
                    throw new ArgumentException("Sample counts of data sources should be equal");
                }

                if (withSequenceAxis)
                {
                    var seqLength = ds.Shape[-2];
                    if (seqLength > maxSeqLength)
                    {
                        maxSeqLength = seqLength;
                    }
                }
            }

            for (var sampleIndex = 0; sampleIndex < sampleCount; ++sampleIndex)
            {
                for (var seq = 0; seq < maxSeqLength; ++seq)
                {
                    foreach (var entry in dataSourceSet)
                    {
                        var name = entry.Key;
                        var ds   = entry.Value;

                        int seqLength;
                        int dim;
                        if (withSequenceAxis)
                        {
                            seqLength = ds.Shape[-2];
                            if (seq >= seqLength)
                            {
                                continue;
                            }

                            dim = ds.Shape.GetSize(ds.Shape.Rank - 3);
                        }
                        else
                        {
                            seqLength = 1;
                            dim       = ds.Shape.GetSize(ds.Shape.Rank - 2);
                        }

                        int index = sampleIndex * dim * seqLength + seq * dim;
                        builder.AddDenseSample(name, new ListSlice <float>(ds.Data, index, dim));
                    }
                    builder.NextLine();
                }
                builder.NextSequence();
            }

            builder.Finish();
        }
Beispiel #10
0
 public static Value[] Invoke(this Function func, DataSourceSet dataSet, DeviceDescriptor device = null, bool errorWhenArgumentUnused = true)
 {
     return(Invoke(func, (IDictionary <string, IDataSource <float> >)dataSet.Features, device, errorWhenArgumentUnused));
 }