Beispiel #1
0
        public static VariableRecord ParseNextRecord(BinaryReader reader)
        {
            var record = new VariableRecord();

            record.Type              = reader.ReadInt32();
            record.HasVariableLabel  = (reader.ReadInt32() == 1);
            record.MissingValueCount = reader.ReadInt32();
            record.PrintFormat       = new OutputFormat(reader.ReadInt32());
            record.WriteFormat       = new OutputFormat(reader.ReadInt32());
            byte[] bytes = reader.ReadBytes(8);
            record.Name = Encoding.Default.GetString(bytes);
            if (record.HasVariableLabel)
            {
                record.LabelLength = reader.ReadInt32();

                //Rounding up to nearest multiple of 32 bits.
                int labelBytes = (((record.LabelLength - 1) / 4) + 1) * 4;
                bytes = reader.ReadBytes(labelBytes);
//                record.Label = new String(reader.ReadChars(labelBytes));
                record.Label = Encoding.Default.GetString(bytes);
            }

            var missingValues = new List <double>(Math.Abs(record.MissingValueCount));

            for (int i = 0; i < Math.Abs(record.MissingValueCount); i++)
            {
                missingValues.Add(reader.ReadDouble());
            }
            record.MissingValues = new Collection <double>(missingValues);

            return(record);
        }
        public static VariableRecord ParseNextRecord(BinaryReader reader)
        {
            var record = new VariableRecord();

            record.Type              = reader.ReadInt32();
            record.HasVariableLabel  = (reader.ReadInt32() == 1);
            record.MissingValueCount = reader.ReadInt32();
            record.PrintFormat       = new OutputFormat(reader.ReadInt32());
            record.WriteFormat       = new OutputFormat(reader.ReadInt32());
            record.Name              = Common.ByteArrayToString(reader.ReadBytes(8));
            if (record.HasVariableLabel)
            {
                record.LabelLength = reader.ReadInt32();

                //Rounding up to nearest multiple of 32 bits.
                //This is the original rounding version. But this leads to a wrong result with record.LabelLength=0
                //This is the strange situation where HasVariableLabel is true, but in fact does not have a label.
                //(((record.LabelLength - 1) / 4) + 1) * 4;
                //New round up version from stackoverflow
                int labelBytes = Common.RoundUp(record.LabelLength, 4);
                record.Label = Common.ByteArrayToString(reader.ReadBytes(labelBytes));
            }

            var missingValues = new List <double>(Math.Abs(record.MissingValueCount));

            for (int i = 0; i < Math.Abs(record.MissingValueCount); i++)
            {
                missingValues.Add(reader.ReadDouble());
            }
            record.MissingValues = new Collection <double>(missingValues);

            return(record);
        }
        public static VariableRecord ParseNextRecord(BinaryReader reader)
        {
            var record = new VariableRecord();
            record.Type = reader.ReadInt32();
            record.HasVariableLabel = (reader.ReadInt32() == 1);
            record.MissingValueCount = reader.ReadInt32();
            record.PrintFormat = new OutputFormat(reader.ReadInt32());
            record.WriteFormat = new OutputFormat(reader.ReadInt32());
            byte[] bytes = reader.ReadBytes(8);
            record.Name = Encoding.Default.GetString(bytes);
            if(record.HasVariableLabel)
            {
                record.LabelLength = reader.ReadInt32();

                //Rounding up to nearest multiple of 32 bits.
                int labelBytes = (((record.LabelLength - 1) / 4) + 1) * 4;
                bytes = reader.ReadBytes(labelBytes);
            //                record.Label = new String(reader.ReadChars(labelBytes));
                record.Label = Encoding.Default.GetString(bytes);
            }

            var missingValues = new List<double>(Math.Abs(record.MissingValueCount));
            for (int i = 0; i < Math.Abs(record.MissingValueCount); i++)
            {
                missingValues.Add(reader.ReadDouble());
            }
            record.MissingValues = new Collection<double>(missingValues);

            return record;
        }
        private static VariableRecord GetVlsExtraVariable(Variable variable, Encoding encoding, int segmentLength, SortedSet <byte[]> previousVariableNames, ref int longNameCounter)
        {
            var record = new VariableRecord
            {
                Encoding    = encoding,
                Name        = variable.Name,
                Type        = segmentLength, // TODO set other values that tell the length
                DisplayInfo = GetVariableDisplayInfo(variable)
            };

            CheckShortName(record, previousVariableNames, ref longNameCounter);

            return(record);
        }
Beispiel #5
0
        private static VariableRecord GetVlsExtraVariable(VariableRecord variable, Encoding encoding, int segmentLength, SortedSet <byte[]> previousVariableNames, SortedList <byte[], int> segmentsNamesList, byte[] originalName)
        {
            var outputFormat = new OutputFormat(FormatType.A, segmentLength);
            var record       = new VariableRecord
            {
                Encoding    = encoding,
                _nameRaw    = GenerateContinuationSegmentShortName(originalName, previousVariableNames, segmentsNamesList),
                Label       = variable.Label,
                Type        = segmentLength,
                PrintFormat = outputFormat,
                WriteFormat = outputFormat,
                DisplayInfo = variable.DisplayInfo
            };

            return(record);
        }
Beispiel #6
0
        /// <summary>
        /// Checks if the name that was set (after slicing it to 8 chars and encoding it properly) is not repeated on the names
        /// of the variables created before this one.
        /// </summary>
        /// <param name="variable"></param>
        /// <param name="previousVariableNames"></param>
        /// <param name="longNameCounter"></param>
        private static void CheckShortName(VariableRecord variable, SortedSet <byte[]> previousVariableNames, ref int longNameCounter)
        {
            // Check if it's already on the variable records names (compare with raw encoded name byte array)
            if (previousVariableNames.Contains(variable._nameRaw))
            {
                // Algorithm to create a variable with a short name.
                // As produced by "IBM SPSS STATISTICS 64-bit MS Windows 22.0.0.0"
                var currentLongNameIndex = ++longNameCounter;

                // Avoid collisions in case there is already a var called VXX_A
                var appendCharIndex = 0;
                do
                {
                    variable.Name = $"V{currentLongNameIndex}_{AppendableChars[appendCharIndex++]}";
                } while (previousVariableNames.Contains(variable._nameRaw));
            }
            // Add the raw encoded name byte array to avoid collitions in following variables
            previousVariableNames.Add(variable._nameRaw);
        }
 public object ValueToObject(byte[] value, VariableRecord variable)
 {
     if (variable.Type == 0)
     {
         var doubleValue = BitConverter.ToDouble(value, 0);
         if (doubleValue == this.SysmisValue)
         {
             return DBNull.Value;
         }
         else
         {
             return doubleValue;
         }
     }
     else
     {
         return Encoding.ASCII.GetString(value);
     }
 }
Beispiel #8
0
        /// <summary>
        /// Creates all variable records needed for this variable
        /// </summary>
        /// <param name="variable">The variable matadata to create the new variable</param>
        /// <param name="headerEncoding">The encoding to use on the header</param>
        /// <param name="previousVariableNames">
        ///     A list of the variable names that were already
        ///     created, to avoid the short name colition
        /// </param>
        /// <param name="longNameCounter">
        ///     The counter of variables with name replaced, to create
        ///     a proper long name that won't collide
        /// </param>
        /// <param name="longStringVariables"></param>
        /// <param name="segmentsNamesList"></param>
        /// <returns>
        ///         Only one var for numbers or text of lenght 8 or less, or the
        ///         main variable definition, followed by string continuation "dummy"
        ///         variables. There should be one for each 8 chars after the first 8.
        ///  </returns>
        internal static VariableRecord[] GetNeededVariables(Variable variable, Encoding headerEncoding, SortedSet <byte[]> previousVariableNames, ref int longNameCounter, IDictionary <string, int> longStringVariables, SortedList <byte[], int> segmentsNamesList)
        {
            var header = new VariableRecord(variable, headerEncoding)
            {
                DisplayInfo = GetVariableDisplayInfo(variable)
            };

            var originalName = header._nameRaw;

            CheckShortName(header, previousVariableNames, ref longNameCounter);

            // Set output format for text variables to be equal to textwidth
            // Also detect if a variable is extra wide and needs to be cut up over multiple variables (long string variables)
            if (variable.Type == DataType.Text)
            {
                if (variable.TextWidth <= 255)
                {
                    header.WriteFormat = new OutputFormat(FormatType.A, variable.TextWidth);
                    header.PrintFormat = new OutputFormat(FormatType.A, variable.TextWidth);
                }
                else
                {
                    longStringVariables.Add(header.Name, variable.TextWidth);
                    header.WriteFormat = new OutputFormat(FormatType.A, 255);
                    header.PrintFormat = new OutputFormat(FormatType.A, 255);
                }
            }

            // If it's numeric or a string of lenght 8 or less, no dummy vars are needed
            if (variable.Type == DataType.Numeric || variable.TextWidth <= 8)
            {
                return(new [] { header });
            }


            var segments = GetLongStringSegmentsCount(variable.TextWidth);

            if (!(segments > 0))
            {
                throw new SpssFileFormatException("String variables can no have less than one segment");
            }

            // Create all the variable continuation records that for each extra 8 bytes of string data
            // The actual count of needed VariableRecords
            var varCount = GetLongStringContinuationRecordsCount(variable.TextWidth);
            var result   = new VariableRecord[varCount];

            var dummyVar = GetStringContinuationRecord();

            var fullSegmentBlocks = GetStringContinuationRecordsCount(255);

            var segmentLength = segments > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments);
            var segmentBlocks = GetStringContinuationRecordsCount(segmentLength);

            header.Type = segmentLength;
            result[0]   = header;

            var currentSegment = 0;
            var i = 1;

            while (true)
            {
                var segmentBaseIndex = fullSegmentBlocks * currentSegment;
                for (; i < segmentBaseIndex + segmentBlocks; i++)
                {
                    result[i] = dummyVar;
                }

                currentSegment++;
                var segmentsLeft = segments - currentSegment;

                if (segmentsLeft <= 0)
                {
                    break;
                }

                segmentLength = segmentsLeft > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments);
                segmentBlocks = GetStringContinuationRecordsCount(segmentLength);

                result[i++] = GetVlsExtraVariable(header, headerEncoding, segmentLength, previousVariableNames, segmentsNamesList, originalName);
            }

            return(result);
        }
        /// <summary>
        /// Creates all variable records needed for this variable
        /// </summary>
        /// <param name="variable">The varaible matadata to create the new variable</param>
        /// <param name="headerEncoding">The encoding to use on the header</param>
        /// <param name="previousVariableNames">
        ///     A list of the variable names that were already
        ///     created, to avoid the short name colition
        /// </param>
        /// <param name="longNameCounter">
        ///     The counter of variables with name replaced, to create
        ///     a proper long name that won't collide
        /// </param>
        /// <param name="longStringVariables"></param>
        /// <returns>
        ///         Only one var for numbers or text of lenght 8 or less, or the
        ///         main variable definition, followed by string continuation "dummy"
        ///         variables. There should be one for each 8 chars after the first 8.
        ///  </returns>
        internal static VariableRecord[] GetNeededVaraibles(Variable variable, Encoding headerEncoding,
                                                            SortedSet <byte[]> previousVariableNames, ref int longNameCounter, IDictionary <string, int> longStringVariables)
        {
            var headVariable = new VariableRecord(variable, headerEncoding);

            headVariable.DisplayInfo = GetVariableDisplayInfo(variable);
            CheckShortName(headVariable, previousVariableNames, ref longNameCounter);

            // If it's numeric or a string of lenght 8 or less, no dummy vars are needed
            if (variable.Type == DataType.Numeric || variable.TextWidth <= 8)
            {
                return(new [] { headVariable });
            }

            if (variable.TextWidth > 255)
            {
                longStringVariables.Add(headVariable.Name, variable.TextWidth);
            }

            var segments = GetLongStringSegmentsCount(variable.TextWidth);

            if (!(segments > 0))
            {
                throw new SpssFileFormatException("String variables can no have less than one segment");
            }

            // Create all the variable continuation records that for each extra 8 bytes of string data
            // The actual count of needed VariableRecords
            var varCount = GetLongStringContinuationRecordsCount(variable.TextWidth);
            var result   = new VariableRecord[varCount];

            var dummyVar = GetStringContinuationRecord();

            var fullSegmentBlocks = GetStringContinuationRecordsCount(255);

            var segmentLength = segments > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments);
            var segmentBlocks = GetStringContinuationRecordsCount(segmentLength);

            headVariable.Type = segmentLength;
            result[0]         = headVariable;

            var currentSegment = 0;
            var i = 1;

            while (true)
            {
                var segmentBaseIndex = fullSegmentBlocks * currentSegment;
                for (; i < segmentBaseIndex + segmentBlocks; i++)
                {
                    result[i] = dummyVar;
                }

                currentSegment++;
                var segmentsLeft = segments - currentSegment;
                if (segmentsLeft > 0)
                {
                    segmentLength = segmentsLeft > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments);
                    segmentBlocks = GetStringContinuationRecordsCount(segmentLength);

                    result[i++] = GetVlsExtraVariable(variable, headerEncoding, segmentLength, previousVariableNames, ref longNameCounter);
                }
                else
                {
                    break;
                }
            }

            return(result);
        }