public static VariableRecord ParseNextRecord(BinaryReader reader) { var record = new VariableRecord(); record.Type = reader.ReadInt32(); record.HasVariableLabel = (reader.ReadInt32() == 1); record.MissingValueCount = reader.ReadInt32(); record.PrintFormat = new OutputFormat(reader.ReadInt32()); record.WriteFormat = new OutputFormat(reader.ReadInt32()); byte[] bytes = reader.ReadBytes(8); record.Name = Encoding.Default.GetString(bytes); if (record.HasVariableLabel) { record.LabelLength = reader.ReadInt32(); //Rounding up to nearest multiple of 32 bits. int labelBytes = (((record.LabelLength - 1) / 4) + 1) * 4; bytes = reader.ReadBytes(labelBytes); // record.Label = new String(reader.ReadChars(labelBytes)); record.Label = Encoding.Default.GetString(bytes); } var missingValues = new List <double>(Math.Abs(record.MissingValueCount)); for (int i = 0; i < Math.Abs(record.MissingValueCount); i++) { missingValues.Add(reader.ReadDouble()); } record.MissingValues = new Collection <double>(missingValues); return(record); }
public static VariableRecord ParseNextRecord(BinaryReader reader) { var record = new VariableRecord(); record.Type = reader.ReadInt32(); record.HasVariableLabel = (reader.ReadInt32() == 1); record.MissingValueCount = reader.ReadInt32(); record.PrintFormat = new OutputFormat(reader.ReadInt32()); record.WriteFormat = new OutputFormat(reader.ReadInt32()); record.Name = Common.ByteArrayToString(reader.ReadBytes(8)); if (record.HasVariableLabel) { record.LabelLength = reader.ReadInt32(); //Rounding up to nearest multiple of 32 bits. //This is the original rounding version. But this leads to a wrong result with record.LabelLength=0 //This is the strange situation where HasVariableLabel is true, but in fact does not have a label. //(((record.LabelLength - 1) / 4) + 1) * 4; //New round up version from stackoverflow int labelBytes = Common.RoundUp(record.LabelLength, 4); record.Label = Common.ByteArrayToString(reader.ReadBytes(labelBytes)); } var missingValues = new List <double>(Math.Abs(record.MissingValueCount)); for (int i = 0; i < Math.Abs(record.MissingValueCount); i++) { missingValues.Add(reader.ReadDouble()); } record.MissingValues = new Collection <double>(missingValues); return(record); }
public static VariableRecord ParseNextRecord(BinaryReader reader) { var record = new VariableRecord(); record.Type = reader.ReadInt32(); record.HasVariableLabel = (reader.ReadInt32() == 1); record.MissingValueCount = reader.ReadInt32(); record.PrintFormat = new OutputFormat(reader.ReadInt32()); record.WriteFormat = new OutputFormat(reader.ReadInt32()); byte[] bytes = reader.ReadBytes(8); record.Name = Encoding.Default.GetString(bytes); if(record.HasVariableLabel) { record.LabelLength = reader.ReadInt32(); //Rounding up to nearest multiple of 32 bits. int labelBytes = (((record.LabelLength - 1) / 4) + 1) * 4; bytes = reader.ReadBytes(labelBytes); // record.Label = new String(reader.ReadChars(labelBytes)); record.Label = Encoding.Default.GetString(bytes); } var missingValues = new List<double>(Math.Abs(record.MissingValueCount)); for (int i = 0; i < Math.Abs(record.MissingValueCount); i++) { missingValues.Add(reader.ReadDouble()); } record.MissingValues = new Collection<double>(missingValues); return record; }
private static VariableRecord GetVlsExtraVariable(Variable variable, Encoding encoding, int segmentLength, SortedSet <byte[]> previousVariableNames, ref int longNameCounter) { var record = new VariableRecord { Encoding = encoding, Name = variable.Name, Type = segmentLength, // TODO set other values that tell the length DisplayInfo = GetVariableDisplayInfo(variable) }; CheckShortName(record, previousVariableNames, ref longNameCounter); return(record); }
private static VariableRecord GetVlsExtraVariable(VariableRecord variable, Encoding encoding, int segmentLength, SortedSet <byte[]> previousVariableNames, SortedList <byte[], int> segmentsNamesList, byte[] originalName) { var outputFormat = new OutputFormat(FormatType.A, segmentLength); var record = new VariableRecord { Encoding = encoding, _nameRaw = GenerateContinuationSegmentShortName(originalName, previousVariableNames, segmentsNamesList), Label = variable.Label, Type = segmentLength, PrintFormat = outputFormat, WriteFormat = outputFormat, DisplayInfo = variable.DisplayInfo }; return(record); }
/// <summary> /// Checks if the name that was set (after slicing it to 8 chars and encoding it properly) is not repeated on the names /// of the variables created before this one. /// </summary> /// <param name="variable"></param> /// <param name="previousVariableNames"></param> /// <param name="longNameCounter"></param> private static void CheckShortName(VariableRecord variable, SortedSet <byte[]> previousVariableNames, ref int longNameCounter) { // Check if it's already on the variable records names (compare with raw encoded name byte array) if (previousVariableNames.Contains(variable._nameRaw)) { // Algorithm to create a variable with a short name. // As produced by "IBM SPSS STATISTICS 64-bit MS Windows 22.0.0.0" var currentLongNameIndex = ++longNameCounter; // Avoid collisions in case there is already a var called VXX_A var appendCharIndex = 0; do { variable.Name = $"V{currentLongNameIndex}_{AppendableChars[appendCharIndex++]}"; } while (previousVariableNames.Contains(variable._nameRaw)); } // Add the raw encoded name byte array to avoid collitions in following variables previousVariableNames.Add(variable._nameRaw); }
public object ValueToObject(byte[] value, VariableRecord variable) { if (variable.Type == 0) { var doubleValue = BitConverter.ToDouble(value, 0); if (doubleValue == this.SysmisValue) { return DBNull.Value; } else { return doubleValue; } } else { return Encoding.ASCII.GetString(value); } }
/// <summary> /// Creates all variable records needed for this variable /// </summary> /// <param name="variable">The variable matadata to create the new variable</param> /// <param name="headerEncoding">The encoding to use on the header</param> /// <param name="previousVariableNames"> /// A list of the variable names that were already /// created, to avoid the short name colition /// </param> /// <param name="longNameCounter"> /// The counter of variables with name replaced, to create /// a proper long name that won't collide /// </param> /// <param name="longStringVariables"></param> /// <param name="segmentsNamesList"></param> /// <returns> /// Only one var for numbers or text of lenght 8 or less, or the /// main variable definition, followed by string continuation "dummy" /// variables. There should be one for each 8 chars after the first 8. /// </returns> internal static VariableRecord[] GetNeededVariables(Variable variable, Encoding headerEncoding, SortedSet <byte[]> previousVariableNames, ref int longNameCounter, IDictionary <string, int> longStringVariables, SortedList <byte[], int> segmentsNamesList) { var header = new VariableRecord(variable, headerEncoding) { DisplayInfo = GetVariableDisplayInfo(variable) }; var originalName = header._nameRaw; CheckShortName(header, previousVariableNames, ref longNameCounter); // Set output format for text variables to be equal to textwidth // Also detect if a variable is extra wide and needs to be cut up over multiple variables (long string variables) if (variable.Type == DataType.Text) { if (variable.TextWidth <= 255) { header.WriteFormat = new OutputFormat(FormatType.A, variable.TextWidth); header.PrintFormat = new OutputFormat(FormatType.A, variable.TextWidth); } else { longStringVariables.Add(header.Name, variable.TextWidth); header.WriteFormat = new OutputFormat(FormatType.A, 255); header.PrintFormat = new OutputFormat(FormatType.A, 255); } } // If it's numeric or a string of lenght 8 or less, no dummy vars are needed if (variable.Type == DataType.Numeric || variable.TextWidth <= 8) { return(new [] { header }); } var segments = GetLongStringSegmentsCount(variable.TextWidth); if (!(segments > 0)) { throw new SpssFileFormatException("String variables can no have less than one segment"); } // Create all the variable continuation records that for each extra 8 bytes of string data // The actual count of needed VariableRecords var varCount = GetLongStringContinuationRecordsCount(variable.TextWidth); var result = new VariableRecord[varCount]; var dummyVar = GetStringContinuationRecord(); var fullSegmentBlocks = GetStringContinuationRecordsCount(255); var segmentLength = segments > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments); var segmentBlocks = GetStringContinuationRecordsCount(segmentLength); header.Type = segmentLength; result[0] = header; var currentSegment = 0; var i = 1; while (true) { var segmentBaseIndex = fullSegmentBlocks * currentSegment; for (; i < segmentBaseIndex + segmentBlocks; i++) { result[i] = dummyVar; } currentSegment++; var segmentsLeft = segments - currentSegment; if (segmentsLeft <= 0) { break; } segmentLength = segmentsLeft > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments); segmentBlocks = GetStringContinuationRecordsCount(segmentLength); result[i++] = GetVlsExtraVariable(header, headerEncoding, segmentLength, previousVariableNames, segmentsNamesList, originalName); } return(result); }
/// <summary> /// Creates all variable records needed for this variable /// </summary> /// <param name="variable">The varaible matadata to create the new variable</param> /// <param name="headerEncoding">The encoding to use on the header</param> /// <param name="previousVariableNames"> /// A list of the variable names that were already /// created, to avoid the short name colition /// </param> /// <param name="longNameCounter"> /// The counter of variables with name replaced, to create /// a proper long name that won't collide /// </param> /// <param name="longStringVariables"></param> /// <returns> /// Only one var for numbers or text of lenght 8 or less, or the /// main variable definition, followed by string continuation "dummy" /// variables. There should be one for each 8 chars after the first 8. /// </returns> internal static VariableRecord[] GetNeededVaraibles(Variable variable, Encoding headerEncoding, SortedSet <byte[]> previousVariableNames, ref int longNameCounter, IDictionary <string, int> longStringVariables) { var headVariable = new VariableRecord(variable, headerEncoding); headVariable.DisplayInfo = GetVariableDisplayInfo(variable); CheckShortName(headVariable, previousVariableNames, ref longNameCounter); // If it's numeric or a string of lenght 8 or less, no dummy vars are needed if (variable.Type == DataType.Numeric || variable.TextWidth <= 8) { return(new [] { headVariable }); } if (variable.TextWidth > 255) { longStringVariables.Add(headVariable.Name, variable.TextWidth); } var segments = GetLongStringSegmentsCount(variable.TextWidth); if (!(segments > 0)) { throw new SpssFileFormatException("String variables can no have less than one segment"); } // Create all the variable continuation records that for each extra 8 bytes of string data // The actual count of needed VariableRecords var varCount = GetLongStringContinuationRecordsCount(variable.TextWidth); var result = new VariableRecord[varCount]; var dummyVar = GetStringContinuationRecord(); var fullSegmentBlocks = GetStringContinuationRecordsCount(255); var segmentLength = segments > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments); var segmentBlocks = GetStringContinuationRecordsCount(segmentLength); headVariable.Type = segmentLength; result[0] = headVariable; var currentSegment = 0; var i = 1; while (true) { var segmentBaseIndex = fullSegmentBlocks * currentSegment; for (; i < segmentBaseIndex + segmentBlocks; i++) { result[i] = dummyVar; } currentSegment++; var segmentsLeft = segments - currentSegment; if (segmentsLeft > 0) { segmentLength = segmentsLeft > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments); segmentBlocks = GetStringContinuationRecordsCount(segmentLength); result[i++] = GetVlsExtraVariable(variable, headerEncoding, segmentLength, previousVariableNames, ref longNameCounter); } else { break; } } return(result); }