private static VariableRecord GetVlsExtraVariable(VariableRecord variable, Encoding encoding, int segmentLength, SortedSet <byte[]> previousVariableNames, SortedList <byte[], int> segmentsNamesList, byte[] originalName) { var outputFormat = new OutputFormat(FormatType.A, segmentLength); var record = new VariableRecord { Encoding = encoding, _nameRaw = GenerateContinuationSegmentShortName(originalName, previousVariableNames, segmentsNamesList), Label = variable.Label, Type = segmentLength, PrintFormat = outputFormat, WriteFormat = outputFormat, DisplayInfo = variable.DisplayInfo }; return(record); }
/// <summary> /// Checks if the name that was set (after slicing it to 8 chars and encoding it properly) is not repeated on the names /// of the variables created before this one. /// </summary> /// <param name="variable"></param> /// <param name="previousVariableNames"></param> /// <param name="longNameCounter"></param> private static void CheckShortName(VariableRecord variable, SortedSet <byte[]> previousVariableNames, ref int longNameCounter) { // Check if it's already on the variable records names (compare with raw encoded name byte array) if (previousVariableNames.Contains(variable._nameRaw)) { // Algorithm to create a variable with a short name. // As produced by "IBM SPSS STATISTICS 64-bit MS Windows 22.0.0.0" var currentLongNameIndex = ++longNameCounter; // Avoid collisions in case there is already a var called VXX_A var appendCharIndex = 0; do { variable.Name = $"V{currentLongNameIndex}_{AppendableChars[appendCharIndex++]}"; } while (previousVariableNames.Contains(variable._nameRaw)); } // Add the raw encoded name byte array to avoid collitions in following variables previousVariableNames.Add(variable._nameRaw); }
/// <summary> /// Creates all variable records needed for this variable /// </summary> /// <param name="variable">The variable matadata to create the new variable</param> /// <param name="headerEncoding">The encoding to use on the header</param> /// <param name="previousVariableNames"> /// A list of the variable names that were already /// created, to avoid the short name colition /// </param> /// <param name="longNameCounter"> /// The counter of variables with name replaced, to create /// a proper long name that won't collide /// </param> /// <param name="longStringVariables"></param> /// <param name="segmentsNamesList"></param> /// <returns> /// Only one var for numbers or text of lenght 8 or less, or the /// main variable definition, followed by string continuation "dummy" /// variables. There should be one for each 8 chars after the first 8. /// </returns> internal static VariableRecord[] GetNeededVariables(Variable variable, Encoding headerEncoding, SortedSet <byte[]> previousVariableNames, ref int longNameCounter, IDictionary <string, int> longStringVariables, SortedList <byte[], int> segmentsNamesList) { var header = new VariableRecord(variable, headerEncoding) { DisplayInfo = GetVariableDisplayInfo(variable) }; var originalName = header._nameRaw; CheckShortName(header, previousVariableNames, ref longNameCounter); // Set output format for text variables to be equal to textwidth // Also detect if a variable is extra wide and needs to be cut up over multiple variables (long string variables) if (variable.Type == DataType.Text) { if (variable.TextWidth <= 255) { header.WriteFormat = new OutputFormat(FormatType.A, variable.TextWidth); header.PrintFormat = new OutputFormat(FormatType.A, variable.TextWidth); } else { longStringVariables.Add(header.Name, variable.TextWidth); header.WriteFormat = new OutputFormat(FormatType.A, 255); header.PrintFormat = new OutputFormat(FormatType.A, 255); } } // If it's numeric or a string of lenght 8 or less, no dummy vars are needed if (variable.Type == DataType.Numeric || variable.TextWidth <= 8) { return(new [] { header }); } var segments = GetLongStringSegmentsCount(variable.TextWidth); if (!(segments > 0)) { throw new SpssFileFormatException("String variables can no have less than one segment"); } // Create all the variable continuation records that for each extra 8 bytes of string data // The actual count of needed VariableRecords var varCount = GetLongStringContinuationRecordsCount(variable.TextWidth); var result = new VariableRecord[varCount]; var dummyVar = GetStringContinuationRecord(); var fullSegmentBlocks = GetStringContinuationRecordsCount(255); var segmentLength = segments > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments); var segmentBlocks = GetStringContinuationRecordsCount(segmentLength); header.Type = segmentLength; result[0] = header; var currentSegment = 0; var i = 1; while (true) { var segmentBaseIndex = fullSegmentBlocks * currentSegment; for (; i < segmentBaseIndex + segmentBlocks; i++) { result[i] = dummyVar; } currentSegment++; var segmentsLeft = segments - currentSegment; if (segmentsLeft <= 0) { break; } segmentLength = segmentsLeft > 1 ? 255 : GetFinalSegmentLenght(variable.TextWidth, segments); segmentBlocks = GetStringContinuationRecordsCount(segmentLength); result[i++] = GetVlsExtraVariable(header, headerEncoding, segmentLength, previousVariableNames, segmentsNamesList, originalName); } return(result); }