public static void Serialize(DataSourceSet dss, string path, bool compress = false) { using (var stream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.None)) { Serialize(dss, stream, compress); } }
private static DataSourceSet ConvertFromSerializableObject(Dictionary <string, Tuple <float[], int[]> > obj) { var dss = new DataSourceSet(); foreach (var entry in obj) { dss.Add(entry.Key, DataSourceFactory.Create(entry.Value.Item1, entry.Value.Item2)); } return(dss); }
protected override void EndProcessing() { if (ParameterSetName == "load") { Path = IO.GetAbsolutePath(this, Path); var dss = DataSourceSet.Load(Path, !NoDecompress); WriteObject(dss); } else { var dss = new DataSourceSet(DataSources); WriteObject(dss); } }
public static void Serialize(DataSourceSet dss, Stream stream, bool compress = false) { var serializer = MessagePackSerializer.Get <Dictionary <string, Tuple <float[], int[]> > >(); var obj = ConvertToSerializableObject(dss); if (compress) { using (var zstream = new DeflateStream(stream, CompressionLevel.Optimal, true)) { serializer.Pack(zstream, obj); } } else { serializer.Pack(stream, obj); } }
public static IEnumerable <CTFSample> GetSampleReader(TextReader reader) { int lineCount = 0; int sequenceCount = 0; int seqStartLineCount = 1; string line; string seqId = null; var splitLines = new List <string[]>(); var comments = new List <string>(); var startIndexMap = new Dictionary <string, int>(); var endIndexMap = new Dictionary <string, int>(); while ((line = reader.ReadLine()) != null) { ++lineCount; var splitLine = line.Split(new char[] { '|' }); var n = splitLine[0].Trim(); if (seqId == null || n == seqId) { splitLines.Add(splitLine); seqId = n; if (reader.Peek() != -1) { continue; } } ++sequenceCount; var seqDim = splitLines.Count; var dss = new DataSourceSet(); comments.Clear(); startIndexMap.Clear(); endIndexMap.Clear(); for (var i = 0; i < splitLines.Count; ++i) { var columns = splitLines[i]; for (var j = 1; j < columns.Length; ++j) { var feature = columns[j].Trim(); if (feature[0] == '#') { int skip; for (skip = 1; skip < feature.Length && Char.IsWhiteSpace(feature[skip]); ++skip) { ; } comments.Add(feature.Substring(skip)); continue; } var items = feature.Split(); if (items.Length < 2) { throw new InvalidDataException(string.Format("line {0}: Invalid feature", lineCount)); } var featureDim = items.Length - 1; var name = items[0]; DataSourceBase <float, float[]> ds; float[] data; if (dss.Features.ContainsKey(name)) { ds = (DataSourceBase <float, float[]>)dss.Features[name]; data = ds.TypedData; } else { data = new float[featureDim * seqDim]; ds = DataSourceFactory.Create(data, new int[] { featureDim, seqDim, 1 }); dss.Add(name, ds); startIndexMap[name] = i; } var baseIndex = ds.Shape.GetSequentialIndex(new int[] { 0, i, 0 }); for (var k = 0; k < featureDim; ++k) { data[baseIndex + k] = Converter.ToFloat(items[k + 1]); } endIndexMap[name] = i; } } foreach (var name in dss.Features.Keys.ToArray()) { var start = startIndexMap[name]; var end = endIndexMap[name]; if (start == 0 && end == splitLines.Count - 1) { continue; } var ds = dss[name]; dss.Features[name] = ds.Subset(start, end - start + 1, -2); } yield return(new CTFSample() { LineCount = seqStartLineCount, SequenceCount = sequenceCount, SequenceId = seqId, DataSet = dss, Comments = comments }); splitLines.Clear(); splitLines.Add(splitLine); seqStartLineCount = lineCount; seqId = n; } }
public void AddMinibatch(DataSourceSet dataSourceSet) { _dataQueue.Add(dataSourceSet); }
private static Dictionary <string, Tuple <float[], int[]> > ConvertToSerializableObject(DataSourceSet dss) { var obj = new Dictionary <string, Tuple <float[], int[]> >(); foreach (var entry in dss.Features) { var ds = entry.Value; obj[entry.Key] = new Tuple <float[], int[]>(ds.Data.ToArray(), ds.Shape.Dimensions); } return(obj); }
public static void Write(string path, DataSourceSet dataSourceSet, bool hasSequenceAxis) { using (var writer = new StreamWriter(path, false, new UTF8Encoding(false))) Write(writer, dataSourceSet, hasSequenceAxis); }
public static void Write(TextWriter writer, DataSourceSet dataSourceSet, bool withSequenceAxis) { var builder = new CTFBuilder(writer, 0, false); // Argument check var sampleCount = dataSourceSet.Features.First().Value.Shape[-1]; var maxSeqLength = 1; foreach (var entry in dataSourceSet) { var name = entry.Key; var ds = entry.Value; if (withSequenceAxis && ds.Shape.Rank < 3) { throw new ArgumentException("DataSource shape should have sequence and batch axes as the last two"); } if (!withSequenceAxis && ds.Shape.Rank < 2) { throw new ArgumentException("DataSource shape should have a batch axis"); } var count = ds.Shape[-1]; if (count != sampleCount) { throw new ArgumentException("Sample counts of data sources should be equal"); } if (withSequenceAxis) { var seqLength = ds.Shape[-2]; if (seqLength > maxSeqLength) { maxSeqLength = seqLength; } } } for (var sampleIndex = 0; sampleIndex < sampleCount; ++sampleIndex) { for (var seq = 0; seq < maxSeqLength; ++seq) { foreach (var entry in dataSourceSet) { var name = entry.Key; var ds = entry.Value; int seqLength; int dim; if (withSequenceAxis) { seqLength = ds.Shape[-2]; if (seq >= seqLength) { continue; } dim = ds.Shape.GetSize(ds.Shape.Rank - 3); } else { seqLength = 1; dim = ds.Shape.GetSize(ds.Shape.Rank - 2); } int index = sampleIndex * dim * seqLength + seq * dim; builder.AddDenseSample(name, new ListSlice <float>(ds.Data, index, dim)); } builder.NextLine(); } builder.NextSequence(); } builder.Finish(); }
public static Value[] Invoke(this Function func, DataSourceSet dataSet, DeviceDescriptor device = null, bool errorWhenArgumentUnused = true) { return(Invoke(func, (IDictionary <string, IDataSource <float> >)dataSet.Features, device, errorWhenArgumentUnused)); }