public StataFileWriter(Stream st, IList <StataVariable> vars, string DataName, bool SmallMemoryFootprint) { this.smallMemoryFootprint = SmallMemoryFootprint; if (smallMemoryFootprint) { try { tmpGSOfn = System.IO.Path.GetTempFileName().Replace(".tmp", ".GSO"); tmpGSOst = File.Create(tmpGSOfn); tmpGSObw = new StataBinaryWriter(tmpGSOst); } catch (Exception ex) { throw new Exception("Error creating temporary GSO-File for smallmemoryfootprint, try without enable smallmemoryfootprint: original Error" + ex.Message); } } this.st = st; this.vars = vars; if (!st.CanSeek) { throw new ArgumentException("Base Stream has to support Seek"); } Map = new StataMap(); bw = new StataBinaryWriter(st); Map.i01_stata_data = (ulong)st.Position; bw.Write("<stata_dta>"); bw.Write("<header>"); bw.Write("<release>117</release>"); bw.Write("<byteorder>" + (BitConverter.IsLittleEndian ? "LSF" : "MSF") + "</byteorder>"); bw.Write("<K>"); bw.Write((UInt16)vars.Count); bw.Write("</K>"); bw.Write("<N>"); bw.Write((UInt32)0); // LATER Write here correct Number of Data bw.Write("</N>"); if (DataName == null) { DataName = ""; } else if (DataName.Length > 80) { DataName = DataName.Substring(1, 80); } bw.Write("<label>"); bw.Write((byte)DataName.Length); bw.Write(DataName); bw.Write("</label>"); bw.Write("<timestamp>" + (char)17 + DateTime.Now.ToString("dd MMM yyyy HH:mm", System.Globalization.CultureInfo.InvariantCulture) + "</timestamp>"); bw.Write("</header>"); Map.i02_map = (ulong)st.Position; bw.Write("<map>"); foreach (var item in Map.GetData()) { bw.Write(item); } bw.Write("</map>"); Map.i03_vartypes = (ulong)st.Position; bw.Write("<variable_types>"); foreach (var item in vars) { if (item.VarType == StataVariable.StataVarType.FixedString) { // TODO: check fixed string length bw.Write((UInt16)item.FixedStringLength); } else { bw.Write((UInt16)item.VarType); } } bw.Write("</variable_types>"); Map.i04_varnames = (ulong)st.Position; bw.Write("<varnames>"); foreach (var item in vars) { bw.Write(GetZeroedFixedString(item.Name, 32)); } bw.Write("</varnames>"); Map.i05_sortlist = (ulong)st.Position; bw.Write("<sortlist>"); foreach (var item in vars) { // TODO: add Sortlist Feature bw.Write((UInt16)0); } bw.Write((UInt16)0); bw.Write("</sortlist>"); Map.i06_formats = (ulong)st.Position; bw.Write("<formats>"); foreach (var item in vars) { bw.Write(GetZeroedFixedString(item.DisplayFormat, 48)); } bw.Write("</formats>"); Map.i07_value_label_names = (ulong)st.Position; bw.Write("<value_label_names>"); foreach (var item in vars) { bw.Write(GetZeroedFixedString(item.ValueLabelName, 32)); } bw.Write("</value_label_names>"); Map.i08_variable_labels = (ulong)st.Position; bw.Write("<variable_labels>"); foreach (var item in vars) { bw.Write(GetZeroedFixedString(item.Description, 80)); } bw.Write("</variable_labels>"); Map.i09_characteristics = (ulong)st.Position; bw.Write("<characteristics></characteristics>"); Map.i10_data = (ulong)st.Position; bw.Write("<data>"); }
public StataFileReader(Stream st, bool SmallMemoryFootprint) { this.smallMemoryFootprint = SmallMemoryFootprint; this.st = st; if (!st.CanSeek) { throw new ArgumentException("Base Stream has to support Seek"); } br = new StataBinaryReader(st); br.ReadExcpectedString("<stata_dta>"); #region Header br.ReadExcpectedString("<header>"); br.ReadExcpectedString("<release>117</release>"); br.ReadExcpectedString("<byteorder>" + (BitConverter.IsLittleEndian ? "LSF" : "MSF") + "</byteorder>"); br.ReadExcpectedString("<K>"); var varCount = br.ReadUInt16(); br.ReadExcpectedString("</K>"); br.ReadExcpectedString("<N>"); dataCount = br.ReadUInt32(); br.ReadExcpectedString("</N>"); br.ReadExcpectedString("<label>"); dataName = br.ReadStringByteLen(); br.ReadExcpectedString("</label>"); br.ReadExcpectedString("<timestamp>" + (char)17); var tt = br.ReadString(17); if (tt[0] == ' ') { tt = "0" + tt.Substring(1); } creationDate = DateTime.ParseExact(tt, "dd MMM yyyy HH:mm", System.Globalization.CultureInfo.InvariantCulture); br.ReadExcpectedString("</timestamp>"); br.ReadExcpectedString("</header>"); #endregion #region Map Map = new StataMap(); br.ReadExcpectedString("<map>"); Map.i01_stata_data = br.ReadUInt64(); Map.i02_map = br.ReadUInt64(); Map.i03_vartypes = br.ReadUInt64(); Map.i04_varnames = br.ReadUInt64(); Map.i05_sortlist = br.ReadUInt64(); Map.i06_formats = br.ReadUInt64(); Map.i07_value_label_names = br.ReadUInt64(); Map.i08_variable_labels = br.ReadUInt64(); Map.i09_characteristics = br.ReadUInt64(); Map.i10_data = br.ReadUInt64(); Map.i11_strls = br.ReadUInt64(); Map.i12_value_labels = br.ReadUInt64(); Map.i13_Stata_dataEnd = br.ReadUInt64(); Map.i14_end_of_file = br.ReadUInt64(); br.ReadExcpectedString("</map>"); #endregion #region Variables variables = new List <StataVariable>(); for (int i = 0; i < varCount; i++) { variables.Add(new StataVariable()); } st.Position = (long)Map.i03_vartypes; br.ReadExcpectedString("<variable_types>"); foreach (var item in variables) { var tmpType = br.ReadUInt16(); if (tmpType <= 2045) { item.FixedStringLength = tmpType; item.VarType = StataVariable.StataVarType.FixedString; DataLineLength += tmpType; } else { item.VarType = (StataVariable.StataVarType)((int)tmpType); switch (item.VarType) { case StataVariable.StataVarType.Byte: DataLineLength += 1; break; case StataVariable.StataVarType.Int: DataLineLength += 2; break; case StataVariable.StataVarType.Float: case StataVariable.StataVarType.Long: DataLineLength += 4; break; case StataVariable.StataVarType.String: case StataVariable.StataVarType.Double: DataLineLength += 8; break; default: throw new Exception("Unknown Datatype: " + tmpType.ToString()); } } } br.ReadExcpectedString("</variable_types>"); st.Position = (long)Map.i04_varnames; br.ReadExcpectedString("<varnames>"); foreach (var item in variables) { item.Name = br.ReadStringZeroed(32); } br.ReadExcpectedString("</varnames>"); if (Map.i05_sortlist > 0) { st.Position = (long)Map.i05_sortlist; br.ReadExcpectedString("<sortlist>"); // TODO: add Sortlist Feature //foreach (var item in vars) //{ // // br.ReadUInt16(); //} //br.ReadUInt16(); //bw.Write("</sortlist>"); } if (Map.i06_formats > 0) { st.Position = (long)Map.i06_formats; br.ReadExcpectedString("<formats>"); foreach (var item in variables) { item.DisplayFormat = br.ReadStringZeroed(48); } } br.ReadExcpectedString("</formats>"); if (Map.i07_value_label_names > 0) { st.Position = (long)Map.i07_value_label_names; br.ReadExcpectedString("<value_label_names>"); foreach (var item in variables) { item.ValueLabelName = br.ReadStringZeroed(32); } br.ReadExcpectedString("</value_label_names>"); } if (Map.i08_variable_labels > 0) { st.Position = (long)Map.i08_variable_labels; br.ReadExcpectedString("<variable_labels>"); foreach (var item in variables) { item.Description = br.ReadStringZeroed(80); } br.ReadExcpectedString("</variable_labels>"); } if (Map.i09_characteristics > 0) { st.Position = (long)Map.i09_characteristics; br.ReadExcpectedString("<characteristics>"); // don't read characteristics //</characteristics>"); } #endregion st.Position = (long)Map.i10_data; br.ReadExcpectedString("<data>"); #region GSOs if (Map.i11_strls > 0) { st.Position = (long)Map.i11_strls; br.ReadExcpectedString("<strls>"); while (br.ReadString(3) == "GSO") { st.Seek(-3, SeekOrigin.Current); if (smallMemoryFootprint) { var GSOCacheItem = br.ReadGSOCache(); if (!GSOcache.ContainsKey(GSOCacheItem.Item1)) { GSOcache.Add(GSOCacheItem.Item1, new Dictionary <uint, Tuple <long, long> >()); } GSOcache[GSOCacheItem.Item1].Add(GSOCacheItem.Item2, new Tuple <long, long>(GSOCacheItem.Item3, GSOCacheItem.Item4)); } else { var GSOItem = br.ReadGSO(); if (!GSO.ContainsKey(GSOItem.Item1)) { GSO.Add(GSOItem.Item1, new Dictionary <uint, string>()); } GSO[GSOItem.Item1].Add(GSOItem.Item2, GSOItem.Item3); } } st.Seek(-3, SeekOrigin.Current); br.ReadExcpectedString("</strls>"); } #endregion #region ValueLabels if (Map.i12_value_labels > 0) { st.Position = (long)Map.i12_value_labels; br.ReadExcpectedString("<value_labels>"); while (br.ReadString(5) == "<lbl>") { var totSize = br.ReadUInt32(); var vlName = br.ReadStringZeroed(32); br.ReadString(3); var numEntries = br.ReadUInt32(); var lenTXT = br.ReadUInt32(); var tmpVLentries = new Int32[numEntries, 2]; for (int i = 0; i < numEntries; i++) { tmpVLentries[i, 0] = br.ReadInt32(); // TXT Offset for Element } for (int i = 0; i < numEntries; i++) { tmpVLentries[i, 1] = br.ReadInt32(); // Value for Element } var vlDic = new Dictionary <Int32, string>(); var ll = st.Position; for (int i = 0; i < numEntries; i++) { st.Position = ll + tmpVLentries[i, 0]; vlDic.Add(tmpVLentries[i, 1], br.ReadStringZeroed()); } st.Position = ll + lenTXT; br.ReadExcpectedString("</lbl>"); valueLabels.Add(vlName, vlDic); } br.ReadExcpectedString("ue_labels>"); // the first 5 charcaters are already read "</val" } #endregion st.Position = (long)Map.i13_Stata_dataEnd; br.ReadExcpectedString("</stata_dta>"); if (Map.i14_end_of_file != (ulong)st.Position) { throw new Exception("Stat File has a wrong length. Expected: " + Map.i14_end_of_file.ToString() + " found: " + st.Position.ToString()); } }