public void GetFields_StateUnderTest_ExpectedBehavior() { // Arrange string line = ",AT,MLD,\"Mollersdorf, Baden\",\"Mollersdorf, Baden\",3,RL,--3-----,1301,,4801N 01618E,"; // Act var result = CsvParseHelper.GetAllFields( line); // Assert Assert.IsTrue(result.Length == 12); // Arrange line = "FunctionCode,FunctionDescription"; // Act result = CsvParseHelper.GetAllFields( line); // Assert Assert.IsTrue(result.Length == 2); // Arrange line = ",MA,MDT,Midelt,Midelt,KHN,AA,--3-----,1607,,3240N 00444W,\""; // Act result = CsvParseHelper.GetAllFields( line); // Assert Assert.IsTrue(result.Length == 12); }
public IEnumerable <DataCellCollection> GetDataRowEntries() { while (this._streamReader.EndOfStream == false) { yield return(this.BuildRow(CsvParseHelper.GetAllFields(this._streamReader.ReadLine()))); } }
public bool Init() { this._logger.Log($"Preparing to Load Headers from {this._sourceFileName}", EventLevel.LogAlways); FileInfo fileInfo = new FileInfo(this._sourceFileName); this._logger.ShowTable($"File Properties {fileInfo.FullName}", new string[] { "Name", "Value" }, new List <string[]> { new string [] { nameof(fileInfo.Attributes), fileInfo.Attributes.ToString() }, new string [] { nameof(fileInfo.Length), (fileInfo.Length / 1024) + " KB" }, new string [] { nameof(fileInfo.CreationTime), fileInfo.CreationTime.ToString() }, new string [] { nameof(fileInfo.LastAccessTime), fileInfo.LastAccessTime.ToString() }, new string [] { nameof(fileInfo.Extension), fileInfo.Extension.ToString() }, } ); //TODO : update to use IdiskIohandler this._streamReader = new StreamReader(this._sourceFileName); string headerLine = _streamReader.ReadLine(); string[] headerRow = CsvParseHelper.GetAllFields(headerLine); this.Columns.AddRange(headerRow.Select(item => new Column() { Name = item }).ToArray()); this._dataCleaner.CleanHeader(this.Columns); this._logger.Log($"Loaded file Headers from {this._sourceFileName}", EventLevel.LogAlways); return(true); }
public bool PerformAutoClean(string dataSourceFileName, string csvTypeDef, int attempt) { string tempId = Guid.NewGuid().ToString().Replace("-", "") + ".csv"; this._logger.LogInformation($"Attempting AutoClean : operationId{tempId}"); if (this._diskIOHandler.FileExists(csvTypeDef)) { // type based cleaning ; not implemented Dictionary <string, List <KeyValuePair <string, string> > > dictionary = JsonConvert.DeserializeObject <Dictionary <string, List <KeyValuePair <string, string> > > >(this._diskIOHandler.FileReadAllText(csvTypeDef)); } int index = 0; using (StreamWriter streamWriter = new StreamWriter(this._diskIOHandler.FileWriteTextStream(tempId))) { using (StreamReader streamReader = new StreamReader(this._diskIOHandler.FileReadTextStream(dataSourceFileName))) { string line = streamReader.ReadLine(); string[] headerRow = CsvParseHelper.GetAllFields(line); streamWriter.WriteLine(line); while (streamReader.EndOfStream == false) { line = streamReader.ReadLine(); string[] data = CsvParseHelper.GetAllFields(line); // treat data misalliggnment if (data.Length != headerRow.Length) { string[] lines = this.TreatMisAlignment(tempId, index, headerRow, line, data); foreach (string alignedLine in lines) { streamWriter.WriteLine(alignedLine); } } else { streamWriter.WriteLine(string.Join(",", data)); } index++; } } } this._diskIOHandler.FileCopy(dataSourceFileName, $"{dataSourceFileName}{tempId}.bak"); this._diskIOHandler.FileCopy(tempId, dataSourceFileName, true); return(this.RunDataAnalysis(attempt)); }
public bool RunDataAnalysis(int attempt = 0) { if (attempt > 2) { this._logger.Log("Attempt Limit has reached , Aborting analiysys", EventLevel.Error); return(false); } this._logger.LogInformation("Runing Analysis"); this._logger.LogInformation("Scanning using text parser started"); int csvLines = 0, textLines = 0, noError = 0; using (StreamReader stream = new StreamReader(this._diskIOHandler.FileReadTextStream(this._runtimeSettings.DataSourceFileName))) { int headerCount = CsvParseHelper.GetAllFields(stream.ReadLine()).Length; while (stream.EndOfStream == false) { string line = stream.ReadLine(); int cellcount = CsvParseHelper.GetAllFields(line).Length; if (cellcount != headerCount) { this._logger.Log($"Error Data Alignment mismatch cellcount {cellcount } != headerCount {headerCount } att position {textLines} , line :{line}", EventLevel.Error); noError++; } textLines += 1; } } this._logger.LogInformation($"Scanning using text parser completed with {noError} errors "); noError = 0; this._logger.LogInformation("Scanning using csv parser started"); using (IDataSource dataSource = this._dataSourceFactory.GetDataSource(this._runtimeSettings.DataSourceFileName)) { int headerCount = dataSource.GetHeaders().Length; foreach (DataCellCollection row in dataSource.GetDataRowEntries()) { if (row.Cells.Count != headerCount) { this._logger.Log($"Error Data Alignment mismatch via csv parser cellcount {row.Cells.Count} != headerCount {headerCount } att position {csvLines}", EventLevel.Error); noError++; } csvLines += 1; } } this._logger.LogInformation($"Scanning using csv parser completed with {noError} errors"); this._runtimeSettings.TotalRows = textLines; if (textLines == csvLines && noError == 0) { this._logger.LogInformation("Text to record size mateched & alignment test passed "); return(true); } else { this._logger.Log($"Found mismatch in number of textLines {textLines} & Csv Lines {csvLines} - Please clean & make sure all the data is properly parsable", EventLevel.Error); string dataSourceFileName = this._runtimeSettings.DataSourceFileName; return(this.PerformAutoClean(dataSourceFileName, Path.GetFileNameWithoutExtension(dataSourceFileName) + "-csvdef.json", attempt + 1)); } }