public void GetCompressionTypeDecodesEnumerableDataDescriptor()
        {
            var descriptor = new DataDescriptor
            {
                Enumerable = true
            };

            Assert.Equal(CompressionTypeHelper.GetCompressionType(descriptor), CompressionTypeHelper.EnumerableTypeId);
        }
        public void GetCompressionTypeDecodesDefaultDataDescriptor()
        {
            var descriptor = new DataDescriptor
            {
                Type = typeof(Person)
            };

            Assert.Equal(CompressionTypeHelper.GetCompressionType(descriptor), CompressionTypeHelper.DefaultTypeId);
        }
        public void GetCompressionTypeDecodesNumericDataDescriptor()
        {
            var descriptor = new DataDescriptor
            {
                Type = typeof(Double)
            };

            Assert.Equal(CompressionTypeHelper.GetCompressionType(descriptor), CompressionTypeHelper.NumericTypeId);
        }
        public static int GetCompressionType(DataDescriptor dataDescriptor)
        {
            if (dataDescriptor.Enumerable)
            {
                return EnumerableTypeId;
            }
            else if (dataDescriptor.Type.IsNumeric())
            {
                return NumericTypeId;
            }

            return DefaultTypeId;
        }
示例#5
0
        public static bool TryRead(Stream logStream, out LogEntry entry)
        {
            long position = logStream.Position;

            byte[] sectorBuffer = new byte[LogSectorSize];
            Utilities.ReadFully(logStream, sectorBuffer, 0, sectorBuffer.Length);

            uint sig = Utilities.ToUInt32LittleEndian(sectorBuffer, 0);
            if (sig != LogEntryHeader.LogEntrySignature)
            {
                entry = null;
                return false;
            }

            LogEntryHeader header = new LogEntryHeader();
            header.ReadFrom(sectorBuffer, 0);

            if (!header.IsValid || header.EntryLength > logStream.Length)
            {
                entry = null;
                return false;
            }

            byte[] logEntryBuffer = new byte[header.EntryLength];
            Array.Copy(sectorBuffer, logEntryBuffer, LogSectorSize);

            Utilities.ReadFully(logStream, logEntryBuffer, LogSectorSize, logEntryBuffer.Length - LogSectorSize);

            Utilities.WriteBytesLittleEndian((int)0, logEntryBuffer, 4);
            if (header.Checksum != Crc32LittleEndian.Compute(Crc32Algorithm.Castagnoli, logEntryBuffer, 0, (int)header.EntryLength))
            {
                entry = null;
                return false;
            }

            int dataPos = Utilities.RoundUp(((int)header.DescriptorCount * 32) + 64, LogSectorSize);

            List<Descriptor> descriptors = new List<Descriptor>();
            for (int i = 0; i < header.DescriptorCount; ++i)
            {
                int offset = (i * 32) + 64;
                Descriptor descriptor;

                uint descriptorSig = Utilities.ToUInt32LittleEndian(logEntryBuffer, offset);
                switch (descriptorSig)
                {
                    case Descriptor.ZeroDescriptorSignature:
                        descriptor = new ZeroDescriptor();
                        break;
                    case Descriptor.DataDescriptorSignature:
                        descriptor = new DataDescriptor(logEntryBuffer, dataPos);
                        dataPos += LogSectorSize;
                        break;
                    default:
                        entry = null;
                        return false;
                }

                descriptor.ReadFrom(logEntryBuffer, offset);
                if (!descriptor.IsValid(header.SequenceNumber))
                {
                    entry = null;
                    return false;
                }

                descriptors.Add(descriptor);
            }

            entry = new LogEntry(position, header, descriptors);
            return true;
        }
        /// <summary>
        /// Load a collection of data from a tab-delimited file with one data point
        /// per line. The data may optionally have associated with class labels
        /// (first element on line) and/or target values (last element on line).
        /// </summary>
        /// <param name="path">Path of file to be read.</param>
        /// <param name="bHasClassLabels">Are the data associated with class labels?</param>
        /// <param name="dataDimension">Dimension of the data (excluding class labels and target values).</param>
        /// <param name="bHasTargetValues">Are the data associated with target values.</param>
        public static DataPointCollection Load(System.IO.Stream stream, int dataDimension, DataDescriptor descriptor)
        {
            bool bHasTargetValues = (descriptor & DataDescriptor.HasTargetValues) == DataDescriptor.HasTargetValues;
              bool bHasClassLabels = (descriptor & DataDescriptor.HasClassLabels) == DataDescriptor.HasClassLabels;

              DataPointCollection result = new DataPointCollection();
              result.data_ = new List<float[]>();
              result.labels_ = bHasClassLabels ? new List<int>() : null;
              result.targets_ = bHasTargetValues ? new List<float>() : null;
              result.dimension_ = dataDimension;

              char[] seperators = new char[] { '\t' };

              int elementsPerLine = (bHasClassLabels ? 1 : 0) + dataDimension + (bHasTargetValues ? 1 : 0);

              using (System.IO.StreamReader r = new System.IO.StreamReader(stream))
              {
            string line;
            while ((line = r.ReadLine()) != null)
            {
              string[] elements = line.Split(seperators);

              if (elements.Length != elementsPerLine)
            throw new Exception("Encountered line with unexpected number of elements.");

              int index = 0;

              if (bHasClassLabels)
              {
            if (!String.IsNullOrEmpty(elements[index]))
            {
              if (!result.labelIndices_.ContainsKey(elements[index]))
                result.labelIndices_.Add(elements[index], result.labelIndices_.Count);
              result.labels_.Add(result.labelIndices_[elements[index++]]);
            }
            else
            {
              result.labels_.Add(UnknownClassLabel);
              index++;
            }
              }

              float[] datum = new float[dataDimension];
              for (int i = 0; i < dataDimension; i++)
            datum[i] = Convert.ToSingle(elements[index++]);

              result.data_.Add(datum);

              if (bHasTargetValues)
            result.targets_.Add(Convert.ToSingle(elements[index++]));
            }
              }

              return result;
        }
示例#7
0
        public int readData(BinaryReader reader, List<string> dictionary, XmlNode element, XmlDocument xDoc, int offset, DataDescriptor dataDescriptor)
        {
            int lengthInBytes = dataDescriptor.end - offset;
            if (dataDescriptor.type == 0x0)
            {
                // Element
                readElement(reader, element, xDoc, dictionary);
            }
            else if (dataDescriptor.type == 0x1)
            {
                // String
                element.InnerText = readString(reader, lengthInBytes);

            }
            else if (dataDescriptor.type == 0x2)
            {
                // Integer number
                element.InnerText = "\t" + readNumber(reader, lengthInBytes) + "\t";
            }
            else if (dataDescriptor.type == 0x3)
            {
                // Floats
                string str = readFloats(reader, lengthInBytes);

                string[] strData = str.Split(' ');
                if (strData.Length == 12)
                {
                    XmlNode row0 = xDoc.CreateElement("row0");
                    XmlNode row1 = xDoc.CreateElement("row1");
                    XmlNode row2 = xDoc.CreateElement("row2");
                    XmlNode row3 = xDoc.CreateElement("row3");
                    row0.InnerText = "\t" + strData[0] + " " + strData[1] + " " + strData[2] + "\t";
                    row1.InnerText = "\t" + strData[3] + " " + strData[4] + " " + strData[5] + "\t";
                    row2.InnerText = "\t" + strData[6] + " " + strData[7] + " " + strData[8] + "\t";
                    row3.InnerText = "\t" + strData[9] + " " + strData[10] + " " + strData[11] + "\t";
                    element.AppendChild(row0);
                    element.AppendChild(row1);
                    element.AppendChild(row2);
                    element.AppendChild(row3);
                }
                else
                {
                    element.InnerText = "\t" + str + "\t";
                }
            }
            else if (dataDescriptor.type == 0x4)
            {
                // Boolean

                if (readBoolean(reader, lengthInBytes))
                {
                    element.InnerText = "\ttrue\t";
                }
                else
                {
                    element.InnerText = "\tfalse\t";
                }

            }
            else if (dataDescriptor.type == 0x5)
            {
                // Base64
                element.InnerText = "\t" + readBase64(reader, lengthInBytes) + "\t";
            }
            else
            {
                throw new System.ArgumentException("Unknown type of \"" + element.Name + ": " + dataDescriptor.ToString() + " " + readAndToHex(reader, lengthInBytes));
            }

            return dataDescriptor.end;
        }
示例#8
0
 public ElementDescriptor(int nameIndex, DataDescriptor dataDescriptor)
 {
     this.nameIndex = nameIndex;
     this.dataDescriptor = dataDescriptor;
 }
示例#9
0
        static DataPointCollection LoadTrainingData(
        string path,
        string alternativePath,
        int dimension,
        DataDescriptor dataDescriptor)
        {
            System.IO.FileStream stream = null;
              try
              {
            stream = new FileStream(path, FileMode.Open, FileAccess.Read);
              }
              catch (Exception)
              {
            string a = System.IO.Path.Combine(
                Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location) + "/",
                alternativePath);
            a = System.IO.Path.Combine(a, path);
            try
            {
              stream = new FileStream(a, FileMode.Open, FileAccess.Read);
            }
            catch (Exception)
            {
              Console.WriteLine("Failed to open training data file at \"{0}\" or \"{1}\".", path, a);
              Environment.Exit(-1);
            }
              }

              DataPointCollection trainingData = null;
              try
              {
            trainingData = DataPointCollection.Load(
            stream,
            dimension,
            dataDescriptor);
              }
              catch (Exception e)
              {
            Console.WriteLine("Failed to read training data. " + e.Message);
            Environment.Exit(-1);
              }

              if (trainingData.Count() < 1)
              {
            Console.WriteLine("Insufficient training data.");
            Environment.Exit(-1);
              }

              return trainingData;
        }