public static int piaac_R1_extract_IB_XML(int _logcounter, logXContainer _ret, Dictionary <string, string> _lookup, string XML, string PersonIdentifier, string _rawSequenceID, string _rawBooklet, string _rawEventType, string _rawEventName, long _relativeTime, CommandLineArguments ParsedCommandLineArguments) { XmlDocument _xmlDocument = new XmlDocument(); _xmlDocument.LoadXml(XML); return(piaac_R1_extract_IB_XML_process_child(_logcounter, _ret, _lookup, _xmlDocument, "", 0, PersonIdentifier, _rawSequenceID, _rawBooklet, _rawEventType, _rawEventName, _relativeTime, ParsedCommandLineArguments)); }
// TODO: Fix bug with EventID and ParentEventID private static void ReadLogDataEEFromXMLString(string XML, logXContainer _ret) { XmlDocument doc = new XmlDocument(); doc.LoadXml(XML); string PersonIdentifier = "Unknown"; string Element = ""; XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable); nsmgr.AddNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance"); nsmgr.AddNamespace("cbaloggingmodel", "http://www.softcon.de/cba/cbaloggingmodel"); foreach (XmlNode row in doc.SelectNodes("//logEntry[@xsi:type='cbaloggingmodel:CBAItemLogEntry']", nsmgr)) { PersonIdentifier = row.Attributes["user"].Value.ToString(); Element = row.Attributes["name"].Value.Replace("de.softcon.cba.runtime.", "").ToString(); } int _logcounter = 0; foreach (XmlNode row in doc.SelectNodes("//logEntry")) { if (row.ChildNodes.Count == 1) { XDocument _xmlElement = XDocument.Parse(row.ChildNodes[0].OuterXml); if (row.ChildNodes[0].Attributes["xsi:type"] != null) { logxGenericLogElement _parament = new logxGenericLogElement() { EventName = row.ChildNodes[0].Attributes["xsi:type"].Value.Replace("cbaloggingmodel:", ""), PersonIdentifier = PersonIdentifier, TimeStamp = DateTime.Parse(row.Attributes["timeStamp"].Value), Item = Element, EventID = _logcounter }; _logcounter++; AddEventData(_xmlElement.Root, _parament, _ret); _ret.AddEvent(_parament); } } } }
public static void ProcessLogFilesOnly(Stopwatch Watch, CommandLineArguments ParsedCommandLineArguments) { try { /* * bool _personIdentifierIsNumber = false; * if (ParsedCommandLineArguments.Flags.Contains("NUMERICPERSONIDENTIFIER")) * _personIdentifierIsNumber = true; */ string _personIdentifier = "lfd"; if (ParsedCommandLineArguments.ParameterDictionary.ContainsKey("personidentifier")) { _personIdentifier = ParsedCommandLineArguments.ParameterDictionary["personidentifier"]; } string _language = "ENG"; if (ParsedCommandLineArguments.ParameterDictionary.ContainsKey("language")) { _language = ParsedCommandLineArguments.ParameterDictionary["language"]; } List <string> _listOfFiles = new List <string>(); foreach (string inFolder in ParsedCommandLineArguments.Transform_InputFolders) { if (!Directory.Exists(inFolder)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Warning: Directory not exists: '" + inFolder + "'."); } continue; } var _tmpFileList = Directory.GetFiles(inFolder, "*_log.dta", SearchOption.AllDirectories); foreach (string s in _tmpFileList) { if (!s.Contains("tbatools")) { _listOfFiles.Add(s); } if (_listOfFiles.Count > ParsedCommandLineArguments.MaxNumberOfCases && ParsedCommandLineArguments.MaxNumberOfCases != -1) { break; } } if (_listOfFiles.Count > ParsedCommandLineArguments.MaxNumberOfCases && ParsedCommandLineArguments.MaxNumberOfCases != -1) { break; } } logXContainer _ret = CreateGenericLogContainer(_listOfFiles, _personIdentifier, true, ParsedCommandLineArguments.ExcludedElements); _ret.LoadCodebookDictionary(ParsedCommandLineArguments.Transform_Dictionary); // TODO: Check! //_ret.UpdateRelativeTimes(); _ret.CreateLookup(); if (ParsedCommandLineArguments.Transform_OutputStata.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create ZIP archive with Stata file(s)."); } _ret.ExportStata(ParsedCommandLineArguments.Transform_OutputStata, _language); } if (ParsedCommandLineArguments.Transform_OutputXLSX.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create XLSX file."); } _ret.ExportXLSX(ParsedCommandLineArguments); } if (ParsedCommandLineArguments.Transform_OutputZCSV.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create ZIP archive with CSV file(s)."); } _ret.ExportCSV(ParsedCommandLineArguments); } if (ParsedCommandLineArguments.Transform_Codebook.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create Codebook File."); } _ret.CreateCodebook(ParsedCommandLineArguments.Transform_Codebook, _language); } if (_ret.ExportErrors.Count > 0) { Console.WriteLine(_ret.ExportErrors.Count + " error(s) creating output files."); if (ParsedCommandLineArguments.Verbose) { for (int i = 0; i < _ret.ExportErrors.Count; i++) { Console.WriteLine(_ret.ExportErrors[i]); } } } } catch (Exception _ex) { Console.WriteLine("Error transforming log data. Details: " + Environment.NewLine + _ex.Message.ToString()); } }
public static logXContainer CreateGenericLogContainer(List <string> StataFiles, string PersonIdentifierName, bool PersonIdentifierIsNumber, string[] ExcludedElements) { logXContainer _ret = new logXContainer() { PersonIdentifierName = PersonIdentifierName, PersonIdentifierIsNumber = PersonIdentifierIsNumber }; try { int _progressCounter = 0; foreach (string StataLogInFileName in StataFiles) { Console.WriteLine(StataLogInFileName + " - " + _progressCounter + " / " + StataFiles.Count()); StataFileReader _stataLogFileReader = new StataFileReader(StataLogInFileName, true); // cache stataVariables and stataValueLabels for performance reasons var _stataVariables = _stataLogFileReader.Variables; var _stataValueLabes = _stataLogFileReader.ValueLabels; int _personIdentifierIndex = GetVariableIndex(_ret.PersonIdentifierName, _stataVariables); int _absolutTimeIndex = GetVariableIndex("AbsoluteTime", _stataVariables); int _lineCounterIndex = GetVariableIndex("LineCounter", _stataVariables); int _logTypeIndex = GetVariableIndex("LogType", _stataVariables); int _logCounterIndex = GetVariableIndex("LogCounter", _stataVariables); int _relativeTimeIndex = GetVariableIndex("RelativeTime", _stataVariables); int _absoluteTimeIndex = GetVariableIndex("AbsoluteTime", _stataVariables); int _elementIndex = GetVariableIndex("Element", _stataVariables); int _varNameIndex = GetVariableIndex("VariableName", _stataVariables); int _varValueIndex = GetVariableIndex("VariableValue", _stataVariables); int _varValueLabelIndex = GetVariableIndex("ValueLabel", _stataVariables); int _xmlIndex = GetVariableIndex("XML", _stataVariables); // find first RealTime for each TestTaker Dictionary <string, DateTime> _firstLoginForEachPerson = new Dictionary <string, DateTime>(); foreach (var _line in _stataLogFileReader) { string _personIdentifier = GetStringValue(_personIdentifierIndex, _line, _stataVariables, _stataValueLabes); DateTime _absolutetime = GetDateTimeValue(_absolutTimeIndex, _line, _stataVariables, _stataValueLabes); if (!_firstLoginForEachPerson.ContainsKey(_personIdentifier)) { _firstLoginForEachPerson.Add(_personIdentifier, DateTime.MaxValue); } if (_absolutetime.Year != 1) { if (_firstLoginForEachPerson[_personIdentifier] > _absolutetime) { _firstLoginForEachPerson[_personIdentifier] = _absolutetime; } } } // extract log data to genereric log element foreach (var _line in _stataLogFileReader) { string _personIdentifier = GetStringValue(_personIdentifierIndex, _line, _stataVariables, _stataValueLabes); int _linecounter = GetIntValue(_lineCounterIndex, _line, _stataVariables); string _logType = GetStringValue(_logTypeIndex, _line, _stataVariables, _stataValueLabes); int _logcounter = GetIntValue(_logCounterIndex, _line, _stataVariables); long _relativetime = GetLogValue(_relativeTimeIndex, _line, _stataVariables); DateTime _absolutetime = GetDateTimeValue(_absolutTimeIndex, _line, _stataVariables, _stataValueLabes); if (_absolutetime.Year == 1) { _absolutetime = _firstLoginForEachPerson[_personIdentifier].AddMilliseconds(-1 * _relativetime); } string _element = GetStringValue(_elementIndex, _line, _stataVariables, _stataValueLabes); string _varname = GetStringValue(_varNameIndex, _line, _stataVariables, _stataValueLabes); string _value = GetStringValue(_varValueIndex, _line, _stataVariables, _stataValueLabes); string _valueLabel = GetStringValue(_varValueLabelIndex, _line, _stataVariables, _stataValueLabes); string _xml = GetStringValue(_xmlIndex, _line, _stataVariables, _stataValueLabes); #region process log events if (!ExcludedElements.Contains <string>(_element)) { if (_logType == "TTLogVariableChanged") { _ret.AddEvent(new logxGenericLogElement() { EventID = _logcounter, Item = _element, EventName = typeof(TBAToolsVariableChanged).Name, EventDataXML = TBAToolsLogReader.XmlSerializeToString(new TBAToolsVariableChanged() { Sender = _element, Variable = _varname, Value = _value, ValueLabel = _valueLabel }), PersonIdentifier = _personIdentifier, TimeStamp = _absolutetime, RelativeTime = _relativetime }); } else if (_logType == "TTLogRealTime") { _ret.AddEvent(new logxGenericLogElement() { EventID = _logcounter, Item = _element, EventName = typeof(TBAToolsRealTime).Name, EventDataXML = TBAToolsLogReader.XmlSerializeToString(new TBAToolsRealTime() { RealTime = _absolutetime }), PersonIdentifier = _personIdentifier, TimeStamp = _absolutetime, RelativeTime = _relativetime }); } else if (_logType == "TTLogLoading") { _ret.AddEvent(new logxGenericLogElement() { EventID = _logcounter, Item = _element, EventName = typeof(TBAToolsLoading).Name, EventDataXML = TBAToolsLogReader.XmlSerializeToString(new TBAToolsLoading() { Sender = _element }), PersonIdentifier = _personIdentifier, TimeStamp = _absolutetime, RelativeTime = _relativetime }); } else if (_logType == "TTLogLoaded") { _ret.AddEvent(new logxGenericLogElement() { EventID = _logcounter, Item = _element, EventName = typeof(TBAToolsLoaded).Name, EventDataXML = TBAToolsLogReader.XmlSerializeToString(new TBAToolsLoaded() { Sender = _element }), PersonIdentifier = _personIdentifier, TimeStamp = _absolutetime, RelativeTime = _relativetime }); } else if (_logType == "TTLogUnloaded") { _ret.AddEvent(new logxGenericLogElement() { EventID = _logcounter, Item = _element, EventName = typeof(TBAToolsUnloaded).Name, EventDataXML = TBAToolsLogReader.XmlSerializeToString(new TBAToolsUnloaded() { Sender = _element }), PersonIdentifier = _personIdentifier, TimeStamp = _absolutetime, RelativeTime = _relativetime }); } else if (_logType == "TTLogUnloading") { _ret.AddEvent(new logxGenericLogElement() { EventID = _logcounter, Item = _element, EventName = typeof(TBAToolsUnloading).Name, EventDataXML = TBAToolsLogReader.XmlSerializeToString(new TBAToolsUnloading() { Sender = _element }), PersonIdentifier = _personIdentifier, TimeStamp = _absolutetime, RelativeTime = _relativetime }); } else if (_logType == "TTLogIBStopTask") { _ret.AddEvent(new logxGenericLogElement() { EventID = _logcounter, Item = _element, EventName = typeof(TBAToolsIBStopTask).Name, EventDataXML = TBAToolsLogReader.XmlSerializeToString(new TBAToolsIBStopTask() { Sender = _element }), PersonIdentifier = _personIdentifier, TimeStamp = _absolutetime, RelativeTime = _relativetime }); } else { _ret.AddEvent(new logxGenericLogElement() { EventID = _logcounter, EventName = _logType, Item = _element, PersonIdentifier = _personIdentifier, EventDataXML = _xml, TimeStamp = _absolutetime, RelativeTime = _relativetime }); } } #endregion } _stataLogFileReader.Close(); _progressCounter++; } } catch (Exception _ex) { Console.WriteLine(_ex.ToString()); throw new Exception(); } return(_ret); }
private static void AddEventData(XElement xmlelement, logxGenericLogElement parent, logXContainer _ret) { var doc = new XDocument(new XElement(parent.EventName)); var root = doc.Root; foreach (var a in xmlelement.Attributes()) { if (a.Name.Namespace.NamespaceName == "") { root.Add(new XAttribute(a.Name.ToString(), a.Value)); } } parent.EventDataXML = doc.ToString(); int _logcounter = 0; foreach (XElement x in xmlelement.Elements()) { logxGenericLogElement _newparent = new logxGenericLogElement() { Item = parent.Item, EventName = parent.EventName + "." + x.Name.LocalName, PersonIdentifier = parent.PersonIdentifier, TimeStamp = parent.TimeStamp, EventID = _logcounter, }; AddEventData(x, _newparent, _ret); _ret.AddEvent(_newparent); _logcounter++; } }
public static void ProcessLogFilesOnly(Stopwatch Watch, CommandLineArguments ParsedCommandLineArguments) { try { bool _personIdentifierIsNumber = false; if (ParsedCommandLineArguments.Flags.Contains("NUMERICPERSONIDENTIFIER")) { _personIdentifierIsNumber = true; } string _personIdentifier = "PersonIdentifier"; if (ParsedCommandLineArguments.ParameterDictionary.ContainsKey("personidentifier")) { _personIdentifier = ParsedCommandLineArguments.ParameterDictionary["personidentifier"]; } string _language = "ENG"; if (ParsedCommandLineArguments.ParameterDictionary.ContainsKey("language")) { _language = ParsedCommandLineArguments.ParameterDictionary["language"]; } List <string> _listOfXMLFiles = new List <string>(); List <string> _listOfZIPArchivesWithXMLFiles = new List <string>(); foreach (string inFolder in ParsedCommandLineArguments.Transform_InputFolders) { if (File.Exists(inFolder)) { if (inFolder.ToLower().EndsWith(".zip")) { _listOfZIPArchivesWithXMLFiles.Add(inFolder); } else if (inFolder.ToLower().EndsWith(".xml")) { _listOfXMLFiles.Add(inFolder); } } else { if (!Directory.Exists(inFolder)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Warning: Directory not exists: '" + inFolder + "'."); } continue; } var _tmpXMLFileList = Directory.GetFiles(inFolder, "*.xml", SearchOption.AllDirectories); foreach (string s in _tmpXMLFileList) { _listOfXMLFiles.Add(s); } var _tmpZIPFileList = Directory.GetFiles(inFolder, "*.zip", SearchOption.AllDirectories); foreach (string s in _tmpZIPFileList) { _listOfZIPArchivesWithXMLFiles.Add(s); } } } logXContainer _ret = new logXContainer() { PersonIdentifierIsNumber = _personIdentifierIsNumber, PersonIdentifierName = _personIdentifier }; _ret.LoadCodebookDictionary(ParsedCommandLineArguments.Transform_Dictionary); if (ParsedCommandLineArguments.Transform_ConcordanceTable.Trim() != "") { if (File.Exists(ParsedCommandLineArguments.Transform_ConcordanceTable)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Read Concordance Table."); } _ret.ReadConcordanceTable(ParsedCommandLineArguments.Transform_ConcordanceTable); } } foreach (string zfilename in _listOfZIPArchivesWithXMLFiles) { using (ZipFile zip = ZipFile.Read(zfilename)) { foreach (var entry in zip) { if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { break; } // TODO: Check FitsMask if (1 == 1 || CommandLineArguments.FitsMask(entry.FileName, ParsedCommandLineArguments.Mask)) { if (ParsedCommandLineArguments.Verbose) { Console.Write("Info: Read File '" + entry.FileName + "' "); } using (MemoryStream zipStream = new MemoryStream()) { entry.ExtractWithPassword(zipStream, ""); zipStream.Position = 0; try { StreamReader sr = new StreamReader(zipStream); string _fileContentAsString = sr.ReadToEnd(); if (_fileContentAsString.Trim().Length > 0) { ReadLogDataEEFromXMLString(_fileContentAsString, _ret); } } catch (Exception _ex) { Console.WriteLine("Error processing file '" + entry.FileName + "': " + _ex.Message); return; } } Console.WriteLine("ok."); } } } if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Max number of cases reached."); } break; } } foreach (string xfilename in _listOfXMLFiles) { if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Max number of cases reached."); } break; } if (1 == 1 || CommandLineArguments.FitsMask(Path.GetFileName(xfilename), ParsedCommandLineArguments.Mask)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Read File '" + Path.GetFileName(xfilename) + "' "); } try { StreamReader sr = new StreamReader(xfilename); string _fileContentAsString = sr.ReadToEnd(); if (_fileContentAsString.Trim().Length > 0) { ReadLogDataEEFromXMLString(_fileContentAsString, _ret); } } catch (Exception _ex) { Console.WriteLine("Error processing file '" + xfilename + "': " + _ex.Message); return; } Console.WriteLine("ok."); } } _ret.UpdateRelativeTimes(); _ret.CreateLookup(); if (ParsedCommandLineArguments.Transform_OutputStata.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create ZIP archive with Stata file(s)."); } _ret.ExportStata(ParsedCommandLineArguments.Transform_OutputStata, _language); } if (ParsedCommandLineArguments.Transform_OutputXLSX.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create XLSX file."); } _ret.ExportXLSX(ParsedCommandLineArguments); } if (ParsedCommandLineArguments.Transform_OutputZCSV.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create ZIP archive with CSV file(s)."); } _ret.ExportCSV(ParsedCommandLineArguments); } if (ParsedCommandLineArguments.Transform_Codebook.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create Codebook File."); } _ret.CreateCodebook(ParsedCommandLineArguments.Transform_Codebook, _language); } if (ParsedCommandLineArguments.Transform_ConcordanceTable.Trim() != "") { if (!File.Exists(ParsedCommandLineArguments.Transform_ConcordanceTable)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create Concordance Table."); } _ret.CreateConcordanceTable(ParsedCommandLineArguments.Transform_ConcordanceTable); } } if (_ret.ExportErrors.Count > 0) { Console.WriteLine(_ret.ExportErrors.Count + " error(s) creating output files."); if (ParsedCommandLineArguments.Verbose) { for (int i = 0; i < _ret.ExportErrors.Count; i++) { Console.WriteLine(_ret.ExportErrors[i]); } } } } catch (Exception _ex) { Console.WriteLine("Error transforming log data. Details: " + Environment.NewLine + _ex.Message.ToString()); } }
private static void processPISA_BQ_single_XML(string[] Element, DateTime dt1970, logXContainer _ret, XmlSerializer logSerializer, string _PersonIdentifier, string _xml) { var _tr = new StringReader(_xml); log _log = (log)logSerializer.Deserialize(_tr); if (_log.itemGroup != null) { // update epoch information foreach (var i in _log.itemGroup) { if (!i.epochSpecified && i.userEvents.Length > 0) { i.epoch = i.userEvents[0].epoch; } } List <logItemGroup> _sortedItemGroupList = _log.itemGroup.OrderBy(o => o.epoch).ToList(); DateTime _MinAbsoluteTime = DateTime.MaxValue; DateTime _PreviousEvent = DateTime.MaxValue; if (_log.User != _PersonIdentifier) { throw new Exception("Person identifier miss-match."); } int _EventID = _ret.GetMaxID(_PersonIdentifier); int _EventVisitCounter = 0; Dictionary <string, int> _elementVisitCounterDict = new Dictionary <string, int>(); string _currentElement = ""; foreach (var p in _sortedItemGroupList) { string _Element = p.code; if (_currentElement != _Element) { _EventVisitCounter = 0; _currentElement = _Element; if (!_elementVisitCounterDict.ContainsKey(_Element)) { _elementVisitCounterDict.Add(_Element, 0); } else { _elementVisitCounterDict[_Element] += 1; } } DateTime _ElementStart = dt1970.AddMilliseconds(p.epoch); if (_PreviousEvent == DateTime.MaxValue) { _PreviousEvent = _ElementStart; } foreach (var _event in p.userEvents) { string _LogEventName = _event.type; DateTime _AbsoluteTime = dt1970.AddMilliseconds(_event.epoch); if (_AbsoluteTime < _MinAbsoluteTime) { _MinAbsoluteTime = _AbsoluteTime; } Dictionary <string, string> _EventValues = new Dictionary <string, string>(); for (int i = 0; i < _event.ItemsElementName.Length; i++) { if (_event.ItemsElementName[i].ToString() == "context") { _EventValues.Add("Context", _event.Items[i]); } else if (_event.ItemsElementName[i].ToString() == "value") { _EventValues.Add("Value", _event.Items[i]); } else if (_event.ItemsElementName[i].ToString() == "id") { _EventValues.Add("Id", _event.Items[i]); } else { throw new Exception("Element name not expected."); } } _EventValues.Add("RelativeTimeFrame", (_AbsoluteTime - _ElementStart).TotalMilliseconds.ToString()); _EventValues.Add("RelativeTimePrevious", (_AbsoluteTime - _PreviousEvent).TotalMilliseconds.ToString()); if (Element.Length == 0 || Element.Contains <string>(_Element)) { var doc = new XDocument(new XElement(_LogEventName)); var root = doc.Root; foreach (string val in _EventValues.Keys) { root.Add(new XAttribute(val, _EventValues[val])); } logxGenericLogElement _parament = new logxGenericLogElement() { PersonIdentifier = _PersonIdentifier, Item = _Element, EventID = _EventID, EventName = _LogEventName, TimeStamp = _AbsoluteTime, EventDataXML = doc.ToString() }; _ret.AddEvent(_parament); } _EventID += 1; _EventVisitCounter += 1; _PreviousEvent = _AbsoluteTime; } } // check for suspicious times /* TODO * _currentElement = ""; * List<string> framesWithSuspiciousData = new List<string>(); * foreach (var v in _inMemoryTempDataEvents) * { * if (_currentElement != v.EventName) * _currentElement = v.EventName; * * if (v.TimeDifferencePrevious.TotalMinutes > 30) * v.AddEventValue("Flag", "TimeToLong"); * * if (v.TimeDifferencePrevious.TotalMilliseconds < 0) * v.AddEventValue("Flag", "TimeNegative"); * } */ } _tr.Close(); }
public static void ProcessLogFilesOnly(Stopwatch Watch, CommandLineArguments ParsedCommandLineArguments) { try { bool _personIdentifierIsNumber = false; if (ParsedCommandLineArguments.Flags.Contains("NUMERICPERSONIDENTIFIER")) { _personIdentifierIsNumber = true; } string _personIdentifier = "PersonIdentifier"; if (ParsedCommandLineArguments.ParameterDictionary.ContainsKey("personidentifier")) { _personIdentifier = ParsedCommandLineArguments.ParameterDictionary["personidentifier"]; } string _language = "ENG"; if (ParsedCommandLineArguments.ParameterDictionary.ContainsKey("language")) { _language = ParsedCommandLineArguments.ParameterDictionary["language"]; } DateTime dt1970 = new DateTime(1970, 1, 1, 0, 0, 0, 0); XmlSerializer logSerializer = new XmlSerializer(typeof(log)); #region Search Source Files List <string> _listOfXMLFiles = new List <string>(); List <string> _listOfZIPArchivesWithXMLFiles = new List <string>(); foreach (string inFolder in ParsedCommandLineArguments.Transform_InputFolders) { // Input is file if (File.Exists(inFolder)) { if (inFolder.ToLower().EndsWith(".zip")) { // Single ZIP file _listOfZIPArchivesWithXMLFiles.Add(inFolder); } else if (inFolder.ToLower().EndsWith(".xml")) { // Single XML file _listOfXMLFiles.Add(inFolder); } } else { if (!Directory.Exists(inFolder)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Warning: Directory not exists: '" + inFolder + "'."); } continue; } var _tmpXMLFileList = Directory.GetFiles(inFolder, "*.xml", SearchOption.AllDirectories); foreach (string s in _tmpXMLFileList) { _listOfXMLFiles.Add(s); } var _tmpZIPFileList = Directory.GetFiles(inFolder, "*.zip", SearchOption.AllDirectories); foreach (string s in _tmpZIPFileList) { _listOfZIPArchivesWithXMLFiles.Add(s); } } } #endregion logXContainer _ret = new logXContainer() { PersonIdentifierIsNumber = _personIdentifierIsNumber, PersonIdentifierName = _personIdentifier }; _ret.LoadCodebookDictionary(ParsedCommandLineArguments.Transform_Dictionary); foreach (string xfilename in _listOfXMLFiles) { if (xfilename.EndsWith("-log.xml")) { StreamReader _sr = new StreamReader(xfilename); var _xml = CleanInvalidXmlChars(_sr.ReadToEnd()); string _PersonIdentifier = Path.GetFileName(xfilename).Replace("-log.xml", ""); processPISA_BQ_single_XML(ParsedCommandLineArguments.Elements, dt1970, _ret, logSerializer, _PersonIdentifier, _xml); _sr.Close(); } } foreach (string zfilename in _listOfZIPArchivesWithXMLFiles) { try { using (var outerInputZipFile = ZipFile.Read(zfilename)) { foreach (var outerInputZipEntry in outerInputZipFile.Entries) { if (outerInputZipEntry.FileName.EndsWith("-log.xml") && outerInputZipEntry.UncompressedSize != 0) { #region Single XML file string _PersonIdentifier = Path.GetFileName(outerInputZipEntry.FileName).Replace("-log.xml", ""); if (ParsedCommandLineArguments.Verbose) { Console.WriteLine(_PersonIdentifier + " -- " + _ret.GetNumberOfPersons); } using (MemoryStream innerZIPEntryMemoryStream = new MemoryStream()) { outerInputZipEntry.Password = ParsedCommandLineArguments.ZIPPassword; outerInputZipEntry.Extract(innerZIPEntryMemoryStream); innerZIPEntryMemoryStream.Position = 0; var _sr = new StreamReader(innerZIPEntryMemoryStream); var _xml = CleanInvalidXmlChars(_sr.ReadToEnd()); processPISA_BQ_single_XML(ParsedCommandLineArguments.Elements, dt1970, _ret, logSerializer, _PersonIdentifier, _xml); _sr.Close(); } #endregion } else if (outerInputZipEntry.FileName.EndsWith("Session2.zip") && outerInputZipEntry.UncompressedSize != 0) { #region ZIP archive with XML files string _PersonIdentifier = Path.GetFileName(outerInputZipEntry.FileName).Replace("-Session2.zip", ""); using (MemoryStream outerZIPEntryMemoryStream = new MemoryStream()) { if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { break; } if (ParsedCommandLineArguments.Verbose) { Console.WriteLine(_PersonIdentifier + " -- " + _ret.GetNumberOfPersons); } outerInputZipEntry.Password = ParsedCommandLineArguments.ZIPPassword; outerInputZipEntry.Extract(outerZIPEntryMemoryStream); outerZIPEntryMemoryStream.Position = 0; using (var innerZIP = ZipFile.Read(outerZIPEntryMemoryStream)) { foreach (var innerZIPEntry in innerZIP.Entries) { if (innerZIPEntry.FileName.EndsWith("_Data.zip") && innerZIPEntry.UncompressedSize != 0) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine(innerZIPEntry.FileName); } using (MemoryStream innerZIPEntryMemoryStream = new MemoryStream()) { innerZIPEntry.Password = ParsedCommandLineArguments.ZIPPassword; innerZIPEntry.Extract(innerZIPEntryMemoryStream); innerZIPEntryMemoryStream.Position = 0; using (var inner2Zip = ZipFile.Read(innerZIPEntryMemoryStream)) { foreach (var inner2ZIPEntry in inner2Zip.Entries) { if (inner2ZIPEntry.FileName.EndsWith("-log.xml") && inner2ZIPEntry.UncompressedSize != 0) { using (MemoryStream inner2ZIPEntryMemoryStream = new MemoryStream()) { //inner2ZIPEntry.Password = ParsedCommandLineArguments.ZIPPassword; inner2ZIPEntry.Extract(inner2ZIPEntryMemoryStream); inner2ZIPEntryMemoryStream.Position = 0; var _sr = new StreamReader(inner2ZIPEntryMemoryStream); var _xml = CleanInvalidXmlChars(_sr.ReadToEnd()); processPISA_BQ_single_XML(ParsedCommandLineArguments.Elements, dt1970, _ret, logSerializer, _PersonIdentifier, _xml); } } } } } } } } } #endregion } } } } catch (Exception _ex) { _ret.ExportErrors.Add("Error reading file '" + zfilename + "': " + _ex.Message); } } if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Max number of cases reached."); } } #region Export Universal Log Format _ret.UpdateRelativeTimes(); _ret.CreateLookup(); if (ParsedCommandLineArguments.Transform_OutputStata.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create ZIP archive with Stata file(s)."); } _ret.ExportStata(ParsedCommandLineArguments.Transform_OutputStata, _language); } if (ParsedCommandLineArguments.Transform_OutputXLSX.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create XLSX file."); } _ret.ExportXLSX(ParsedCommandLineArguments); } if (ParsedCommandLineArguments.Transform_OutputZCSV.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create ZIP archive with CSV file(s)."); } _ret.ExportCSV(ParsedCommandLineArguments); } if (ParsedCommandLineArguments.Transform_Codebook.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create Codebook File."); } _ret.CreateCodebook(ParsedCommandLineArguments.Transform_Codebook, _language); } if (ParsedCommandLineArguments.Transform_ConcordanceTable.Trim() != "") { if (!File.Exists(ParsedCommandLineArguments.Transform_ConcordanceTable)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create Concordance Table."); } _ret.CreateConcordanceTable(ParsedCommandLineArguments.Transform_ConcordanceTable); } } if (_ret.ExportErrors.Count > 0) { Console.WriteLine(_ret.ExportErrors.Count + " error(s) creating output files."); if (ParsedCommandLineArguments.Verbose) { for (int i = 0; i < _ret.ExportErrors.Count; i++) { Console.WriteLine(_ret.ExportErrors[i]); } } } #endregion } catch (Exception _ex) { Console.WriteLine("Error transforming log data. Details: " + Environment.NewLine + _ex.Message.ToString()); } }
private static int piaac_R1_extract_IB_XML_process_child(int _logcounter, logXContainer _ret, Dictionary <string, string> _lookup, XmlNode node, string Path, int Level, string PersonIdentifier, string _rawSequenceID, string _rawBooklet, string _rawEventType, string _rawEventName, long _relativeTime, CommandLineArguments ParsedCommandLineArguments) { string _name = ""; foreach (XmlNode n in node.ChildNodes) { _name = n.LocalName; if (Path.Trim() != "") { _name = Path + "." + _name; } string _itemID = _rawBooklet + "_" + _rawSequenceID; if (!ParsedCommandLineArguments.Flags.Contains("USE_BOOKLLET_SEQUENCE_ID")) { if (_lookup.ContainsKey(_itemID)) { _itemID = _lookup[_itemID]; } } XmlAttributeCollection atributos = n.Attributes; logxGenericLogElement _parament = new logxGenericLogElement() { PersonIdentifier = PersonIdentifier, EventName = _name, Item = _itemID, RelativeTime = _relativeTime, EventID = _logcounter }; _logcounter++; var doc = new XDocument(new XElement(_parament.EventName)); var root = doc.Root; foreach (XmlAttribute at in atributos) { if (at.LocalName == "cbaloggingmodel" || at.LocalName == "cbascoringresultmm" || at.LocalName == "snapshot" || at.LocalName == "xmi" || at.LocalName == "xsi") { // ignore attributes by default if (ParsedCommandLineArguments.Flags.Contains("INCLUDE_XML_ATTRIBUTES")) { root.Add(new XAttribute(at.LocalName, at.Value)); } } else { root.Add(new XAttribute(at.LocalName, at.Value)); } } if (!ParsedCommandLineArguments.Flags.Contains("HIDE_BOOKLET_INFORMATION")) { root.Add(new XAttribute("PIAACBookelt", _rawBooklet)); } if (!ParsedCommandLineArguments.Flags.Contains("HIDE_EVENT_INFORMATION")) { root.Add(new XAttribute("PIAACEventType", _rawEventType)); root.Add(new XAttribute("PIAACEventName", _rawEventName)); } if (!ParsedCommandLineArguments.Flags.Contains("HIDE_XML_NESTING_LEVEL")) { root.Add(new XAttribute("XMLNextingLevel", Level.ToString())); } _parament.EventDataXML = doc.ToString(); _ret.AddEvent(_parament); } foreach (XmlNode n in node.ChildNodes) { piaac_R1_extract_IB_XML_process_child(_logcounter, _ret, _lookup, n, _name, Level + 1, PersonIdentifier, _rawSequenceID, _rawBooklet, _rawEventType, _rawEventName, _relativeTime, ParsedCommandLineArguments); } return(_logcounter); }
private static int ReadLogDataPIAACFromLDAExportString(int _logcounter, int _lineCounter, string line, logXContainer _ret, string _PersonIdentifier, CommandLineArguments ParsedCommandLineArguments, EventDataListExtension.ESortType sort, Dictionary <string, string> _lookup, string[] Element) { string[] _cols = line.Split('\t'); if (_cols.Length > 1 && _lineCounter > 0) { _PersonIdentifier = _cols[0] + "_" + _cols[1]; string _rawBooklet = _cols[2]; string _rawSequenceID = _cols[3]; string _rawEventName = _cols[4]; string _rawEventType = _cols[5]; string _relativeTimeString = _cols[6]; string _rawEventDescription = _cols[7]; string _itemID = _rawBooklet + "_" + _rawSequenceID; long _relativeTime = long.Parse(_cols[6], CultureInfo.InvariantCulture); if (!ParsedCommandLineArguments.Flags.Contains("USE_BOOKLLET_SEQUENCE_ID")) { if (_lookup.ContainsKey(_itemID)) { _itemID = _lookup[_itemID]; } } bool _isFiltered = Element.Length == 0; if (Element.Contains(_itemID)) { _isFiltered = true; } if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { return(_logcounter); } if (_isFiltered) { if (_rawEventDescription.StartsWith("<cbaloggingmodel", StringComparison.InvariantCulture)) { // IB xml events _logcounter = piaac_R1_extract_IB_XML(_logcounter, _ret, _lookup, _rawEventDescription, _PersonIdentifier, _rawSequenceID, _rawBooklet, _rawEventType, _rawEventName, _relativeTime, ParsedCommandLineArguments); } else if (_rawEventDescription.StartsWith("<cbascoringresultmm", StringComparison.InvariantCulture)) { // IB item score if (_rawEventType == "ItemScore") { if (1 == 1 || ParsedCommandLineArguments.Flags.Contains("INCLUDE_ALL_ITEMSCORES") || !ParsedCommandLineArguments.Flags.Contains("INCLUDE_NO_ITEMSCORES")) { _logcounter = piaac_R1_extract_IB_XML(_logcounter, _ret, _lookup, _rawEventDescription, _PersonIdentifier, _rawSequenceID, _rawBooklet, _rawEventType, _rawEventName, _relativeTime, ParsedCommandLineArguments); } } else if (_rawEventType == "itemScoreResult") { // by default ignore second item score if (1 == 1 || ParsedCommandLineArguments.Flags.Contains("INCLUDE_ALL_ITEMSCORES")) { _logcounter = piaac_R1_extract_IB_XML(_logcounter, _ret, _lookup, _rawEventDescription, _PersonIdentifier, _rawSequenceID, _rawBooklet, _rawEventType, _rawEventName, _relativeTime, ParsedCommandLineArguments); } } else { throw new Exception("The value '" + _rawEventName + "' was not expected in line " + _lineCounter + "."); } } else if (_rawEventDescription.StartsWith("http://localhost:8080", StringComparison.InvariantCulture)) { // ignore snapshots } else { // Pre-process event values logxGenericLogElement _parament = new logxGenericLogElement() { PersonIdentifier = _PersonIdentifier, EventName = _rawEventType, Item = _itemID, RelativeTime = _relativeTime, EventID = _logcounter }; _logcounter++; var doc = new XDocument(new XElement(_parament.EventName)); var root = doc.Root; _rawEventDescription = _rawEventDescription.Replace("|$*", "|*$"); string[] _kvps = _rawEventDescription.Split("|*$"); foreach (string _kvp in _kvps) { string[] _kv = _kvp.Split('='); if (_kv.Length == 2) { root.Add(new XAttribute(_kv[0], _kv[1])); } else if (_kv.Length == 1) { root.Add(new XAttribute("value", _kv[0])); } else { string _remainingvalue = _kv[1]; for (int i = 2; i < _kv.Length; i++) { _remainingvalue = _remainingvalue + "=" + _kv[i]; } root.Add(new XAttribute(_kv[0], _remainingvalue)); } } if (!ParsedCommandLineArguments.Flags.Contains("HIDE_BOOKLET_INFORMATION")) { root.Add(new XAttribute("PIAACBookelt", _rawBooklet)); } if (!ParsedCommandLineArguments.Flags.Contains("HIDE_EVENT_INFORMATION")) { root.Add(new XAttribute("PIAACSequenceID", _rawSequenceID)); root.Add(new XAttribute("PIAACEventType", _rawEventType)); root.Add(new XAttribute("PIAACEventName", _rawEventName)); } if (!ParsedCommandLineArguments.Flags.Contains("HIDE_TIME_INFORMATION")) { root.Add(new XAttribute("RelativeTimeString", _relativeTimeString)); } _parament.EventDataXML = doc.ToString(); _ret.AddEvent(_parament); } } } return(_logcounter); }
public static void ProcessLogFilesOnly(Stopwatch Watch, CommandLineArguments ParsedCommandLineArguments) { try { bool _personIdentifierIsNumber = false; if (ParsedCommandLineArguments.Flags.Contains("NUMERICPERSONIDENTIFIER")) { _personIdentifierIsNumber = true; } string _personIdentifier = "PersonIdentifier"; if (ParsedCommandLineArguments.ParameterDictionary.ContainsKey("personidentifier")) { _personIdentifier = ParsedCommandLineArguments.ParameterDictionary["personidentifier"]; } string _language = "ENG"; if (ParsedCommandLineArguments.ParameterDictionary.ContainsKey("language")) { _language = ParsedCommandLineArguments.ParameterDictionary["language"]; } if (!ParsedCommandLineArguments.RelativeTime) { ParsedCommandLineArguments.RelativeTime = true; Console.Write("Note: Changed to relative times. "); } EventDataListExtension.ESortType sort = EventDataListExtension.ESortType.ElementAndTime; if (ParsedCommandLineArguments.Flags.Contains("DONT_ORDER_EVENTS")) { sort = EventDataListExtension.ESortType.None; } Dictionary <string, string> _lookup = LogDataTransformer_PIAACR1_Module_V01.GetPIAACR1LookupDictionary(); #region Search Source Files List <string> _listOfTXTFiles = new List <string>(); List <string> _listOfZIPArchivesWithTXTFiles = new List <string>(); foreach (string inFolder in ParsedCommandLineArguments.Transform_InputFolders) { if (File.Exists(inFolder)) { if (inFolder.ToLower().EndsWith(".zip")) { _listOfZIPArchivesWithTXTFiles.Add(inFolder); } else if (inFolder.ToLower().EndsWith(".txt")) { _listOfTXTFiles.Add(inFolder); } } else { if (!Directory.Exists(inFolder)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Warning: Directory not exists: '" + inFolder + "'."); } continue; } var _tmpXMLFileList = Directory.GetFiles(inFolder, "*.txt", SearchOption.AllDirectories); foreach (string s in _tmpXMLFileList) { _listOfTXTFiles.Add(s); } var _tmpZIPFileList = Directory.GetFiles(inFolder, "*.zip", SearchOption.AllDirectories); foreach (string s in _tmpZIPFileList) { _listOfZIPArchivesWithTXTFiles.Add(s); } } } #endregion #region Process Source Files logXContainer _ret = new logXContainer() { PersonIdentifierIsNumber = _personIdentifierIsNumber, PersonIdentifierName = _personIdentifier }; _ret.LoadCodebookDictionary(ParsedCommandLineArguments.Transform_Dictionary); int _logcounter = 0; foreach (string zfilename in _listOfZIPArchivesWithTXTFiles) { using (ZipFile zip = ZipFile.Read(zfilename)) { foreach (var entry in zip) { if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Max number of cases reached."); } break; } // TODO: Check FitsMask if (1 == 1 || CommandLineArguments.FitsMask(entry.FileName, ParsedCommandLineArguments.Mask)) { if (ParsedCommandLineArguments.Verbose) { Console.Write("Info: Read File '" + entry.FileName + "' "); } using (MemoryStream zipStream = new MemoryStream()) { entry.ExtractWithPassword(zipStream, ""); zipStream.Position = 0; try { StreamReader sr = new StreamReader(zipStream); string line = String.Empty; int _lineCounter = 0; while ((line = sr.ReadLine()) != null) { _logcounter = ReadLogDataPIAACFromLDAExportString(_logcounter, _lineCounter, line, _ret, _personIdentifier, ParsedCommandLineArguments, sort, _lookup, new string[] { }); _lineCounter++; if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Max number of cases reached."); } break; } } } catch (Exception _ex) { Console.WriteLine("Error processing file '" + entry.FileName + "': " + _ex.Message); return; } } Console.WriteLine("ok."); } } } if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Max number of cases reached."); } break; } } foreach (string txtFile in _listOfTXTFiles) { if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Max number of cases reached."); } break; } if (1 == 1 || CommandLineArguments.FitsMask(Path.GetFileName(txtFile), ParsedCommandLineArguments.Mask)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Read File '" + Path.GetFileName(txtFile) + "' "); } try { StreamReader sr = new StreamReader(txtFile); string line = String.Empty; int _lineCounter = 0; while ((line = sr.ReadLine()) != null) { _logcounter = ReadLogDataPIAACFromLDAExportString(_logcounter, _lineCounter, line, _ret, _personIdentifier, ParsedCommandLineArguments, sort, _lookup, new string[] { }); _lineCounter++; if (ParsedCommandLineArguments.MaxNumberOfCases > 0 && _ret.GetNumberOfPersons >= ParsedCommandLineArguments.MaxNumberOfCases) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Info: Max number of cases reached."); } break; } } } catch (Exception _ex) { Console.WriteLine("Error processing file '" + txtFile + "': " + _ex.Message); return; } Console.WriteLine("ok."); } } #endregion #region Export Universal Log Format // TODO: Check! //_ret.UpdateRelativeTimes(); _ret.CreateLookup(); if (ParsedCommandLineArguments.Transform_OutputStata.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create ZIP archive with Stata file(s)."); } _ret.ExportStata(ParsedCommandLineArguments.Transform_OutputStata, _language); } if (ParsedCommandLineArguments.Transform_OutputXLSX.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create XLSX file."); } _ret.ExportXLSX(ParsedCommandLineArguments); } if (ParsedCommandLineArguments.Transform_OutputZCSV.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create ZIP archive with CSV file(s)."); } _ret.ExportCSV(ParsedCommandLineArguments); } if (ParsedCommandLineArguments.Transform_Codebook.Trim() != "") { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create Codebook File."); } _ret.CreateCodebook(ParsedCommandLineArguments.Transform_Codebook, _language); } if (ParsedCommandLineArguments.Transform_ConcordanceTable.Trim() != "") { if (!File.Exists(ParsedCommandLineArguments.Transform_ConcordanceTable)) { if (ParsedCommandLineArguments.Verbose) { Console.WriteLine("Create Concordance Table."); } _ret.CreateConcordanceTable(ParsedCommandLineArguments.Transform_ConcordanceTable); } } if (_ret.ExportErrors.Count > 0) { Console.WriteLine(_ret.ExportErrors.Count + " error(s) creating output files."); if (ParsedCommandLineArguments.Verbose) { for (int i = 0; i < _ret.ExportErrors.Count; i++) { Console.WriteLine(_ret.ExportErrors[i]); } } } #endregion } catch (Exception _ex) { Console.WriteLine("Error transforming log data. Details: " + Environment.NewLine + _ex.Message.ToString()); } }