public void GetCompressionTypeDecodesEnumerableDataDescriptor() { var descriptor = new DataDescriptor { Enumerable = true }; Assert.Equal(CompressionTypeHelper.GetCompressionType(descriptor), CompressionTypeHelper.EnumerableTypeId); }
public void GetCompressionTypeDecodesDefaultDataDescriptor() { var descriptor = new DataDescriptor { Type = typeof(Person) }; Assert.Equal(CompressionTypeHelper.GetCompressionType(descriptor), CompressionTypeHelper.DefaultTypeId); }
public void GetCompressionTypeDecodesNumericDataDescriptor() { var descriptor = new DataDescriptor { Type = typeof(Double) }; Assert.Equal(CompressionTypeHelper.GetCompressionType(descriptor), CompressionTypeHelper.NumericTypeId); }
public static int GetCompressionType(DataDescriptor dataDescriptor) { if (dataDescriptor.Enumerable) { return EnumerableTypeId; } else if (dataDescriptor.Type.IsNumeric()) { return NumericTypeId; } return DefaultTypeId; }
public static bool TryRead(Stream logStream, out LogEntry entry) { long position = logStream.Position; byte[] sectorBuffer = new byte[LogSectorSize]; Utilities.ReadFully(logStream, sectorBuffer, 0, sectorBuffer.Length); uint sig = Utilities.ToUInt32LittleEndian(sectorBuffer, 0); if (sig != LogEntryHeader.LogEntrySignature) { entry = null; return false; } LogEntryHeader header = new LogEntryHeader(); header.ReadFrom(sectorBuffer, 0); if (!header.IsValid || header.EntryLength > logStream.Length) { entry = null; return false; } byte[] logEntryBuffer = new byte[header.EntryLength]; Array.Copy(sectorBuffer, logEntryBuffer, LogSectorSize); Utilities.ReadFully(logStream, logEntryBuffer, LogSectorSize, logEntryBuffer.Length - LogSectorSize); Utilities.WriteBytesLittleEndian((int)0, logEntryBuffer, 4); if (header.Checksum != Crc32LittleEndian.Compute(Crc32Algorithm.Castagnoli, logEntryBuffer, 0, (int)header.EntryLength)) { entry = null; return false; } int dataPos = Utilities.RoundUp(((int)header.DescriptorCount * 32) + 64, LogSectorSize); List<Descriptor> descriptors = new List<Descriptor>(); for (int i = 0; i < header.DescriptorCount; ++i) { int offset = (i * 32) + 64; Descriptor descriptor; uint descriptorSig = Utilities.ToUInt32LittleEndian(logEntryBuffer, offset); switch (descriptorSig) { case Descriptor.ZeroDescriptorSignature: descriptor = new ZeroDescriptor(); break; case Descriptor.DataDescriptorSignature: descriptor = new DataDescriptor(logEntryBuffer, dataPos); dataPos += LogSectorSize; break; default: entry = null; return false; } descriptor.ReadFrom(logEntryBuffer, offset); if (!descriptor.IsValid(header.SequenceNumber)) { entry = null; return false; } descriptors.Add(descriptor); } entry = new LogEntry(position, header, descriptors); return true; }
/// <summary> /// Load a collection of data from a tab-delimited file with one data point /// per line. The data may optionally have associated with class labels /// (first element on line) and/or target values (last element on line). /// </summary> /// <param name="path">Path of file to be read.</param> /// <param name="bHasClassLabels">Are the data associated with class labels?</param> /// <param name="dataDimension">Dimension of the data (excluding class labels and target values).</param> /// <param name="bHasTargetValues">Are the data associated with target values.</param> public static DataPointCollection Load(System.IO.Stream stream, int dataDimension, DataDescriptor descriptor) { bool bHasTargetValues = (descriptor & DataDescriptor.HasTargetValues) == DataDescriptor.HasTargetValues; bool bHasClassLabels = (descriptor & DataDescriptor.HasClassLabels) == DataDescriptor.HasClassLabels; DataPointCollection result = new DataPointCollection(); result.data_ = new List<float[]>(); result.labels_ = bHasClassLabels ? new List<int>() : null; result.targets_ = bHasTargetValues ? new List<float>() : null; result.dimension_ = dataDimension; char[] seperators = new char[] { '\t' }; int elementsPerLine = (bHasClassLabels ? 1 : 0) + dataDimension + (bHasTargetValues ? 1 : 0); using (System.IO.StreamReader r = new System.IO.StreamReader(stream)) { string line; while ((line = r.ReadLine()) != null) { string[] elements = line.Split(seperators); if (elements.Length != elementsPerLine) throw new Exception("Encountered line with unexpected number of elements."); int index = 0; if (bHasClassLabels) { if (!String.IsNullOrEmpty(elements[index])) { if (!result.labelIndices_.ContainsKey(elements[index])) result.labelIndices_.Add(elements[index], result.labelIndices_.Count); result.labels_.Add(result.labelIndices_[elements[index++]]); } else { result.labels_.Add(UnknownClassLabel); index++; } } float[] datum = new float[dataDimension]; for (int i = 0; i < dataDimension; i++) datum[i] = Convert.ToSingle(elements[index++]); result.data_.Add(datum); if (bHasTargetValues) result.targets_.Add(Convert.ToSingle(elements[index++])); } } return result; }
public int readData(BinaryReader reader, List<string> dictionary, XmlNode element, XmlDocument xDoc, int offset, DataDescriptor dataDescriptor) { int lengthInBytes = dataDescriptor.end - offset; if (dataDescriptor.type == 0x0) { // Element readElement(reader, element, xDoc, dictionary); } else if (dataDescriptor.type == 0x1) { // String element.InnerText = readString(reader, lengthInBytes); } else if (dataDescriptor.type == 0x2) { // Integer number element.InnerText = "\t" + readNumber(reader, lengthInBytes) + "\t"; } else if (dataDescriptor.type == 0x3) { // Floats string str = readFloats(reader, lengthInBytes); string[] strData = str.Split(' '); if (strData.Length == 12) { XmlNode row0 = xDoc.CreateElement("row0"); XmlNode row1 = xDoc.CreateElement("row1"); XmlNode row2 = xDoc.CreateElement("row2"); XmlNode row3 = xDoc.CreateElement("row3"); row0.InnerText = "\t" + strData[0] + " " + strData[1] + " " + strData[2] + "\t"; row1.InnerText = "\t" + strData[3] + " " + strData[4] + " " + strData[5] + "\t"; row2.InnerText = "\t" + strData[6] + " " + strData[7] + " " + strData[8] + "\t"; row3.InnerText = "\t" + strData[9] + " " + strData[10] + " " + strData[11] + "\t"; element.AppendChild(row0); element.AppendChild(row1); element.AppendChild(row2); element.AppendChild(row3); } else { element.InnerText = "\t" + str + "\t"; } } else if (dataDescriptor.type == 0x4) { // Boolean if (readBoolean(reader, lengthInBytes)) { element.InnerText = "\ttrue\t"; } else { element.InnerText = "\tfalse\t"; } } else if (dataDescriptor.type == 0x5) { // Base64 element.InnerText = "\t" + readBase64(reader, lengthInBytes) + "\t"; } else { throw new System.ArgumentException("Unknown type of \"" + element.Name + ": " + dataDescriptor.ToString() + " " + readAndToHex(reader, lengthInBytes)); } return dataDescriptor.end; }
public ElementDescriptor(int nameIndex, DataDescriptor dataDescriptor) { this.nameIndex = nameIndex; this.dataDescriptor = dataDescriptor; }
static DataPointCollection LoadTrainingData( string path, string alternativePath, int dimension, DataDescriptor dataDescriptor) { System.IO.FileStream stream = null; try { stream = new FileStream(path, FileMode.Open, FileAccess.Read); } catch (Exception) { string a = System.IO.Path.Combine( Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "/", alternativePath); a = System.IO.Path.Combine(a, path); try { stream = new FileStream(a, FileMode.Open, FileAccess.Read); } catch (Exception) { Console.WriteLine("Failed to open training data file at \"{0}\" or \"{1}\".", path, a); Environment.Exit(-1); } } DataPointCollection trainingData = null; try { trainingData = DataPointCollection.Load( stream, dimension, dataDescriptor); } catch (Exception e) { Console.WriteLine("Failed to read training data. " + e.Message); Environment.Exit(-1); } if (trainingData.Count() < 1) { Console.WriteLine("Insufficient training data."); Environment.Exit(-1); } return trainingData; }