示例#1
0
        public void GetFields_StateUnderTest_ExpectedBehavior()
        {
            // Arrange
            string line = ",AT,MLD,\"Mollersdorf, Baden\",\"Mollersdorf, Baden\",3,RL,--3-----,1301,,4801N 01618E,";


            // Act
            var result = CsvParseHelper.GetAllFields(
                line);

            // Assert
            Assert.IsTrue(result.Length == 12);
            // Arrange
            line = "FunctionCode,FunctionDescription";


            // Act
            result = CsvParseHelper.GetAllFields(
                line);
            // Assert

            Assert.IsTrue(result.Length == 2);
            // Arrange
            line = ",MA,MDT,Midelt,Midelt,KHN,AA,--3-----,1607,,3240N 00444W,\"";


            // Act
            result = CsvParseHelper.GetAllFields(
                line);
            // Assert

            Assert.IsTrue(result.Length == 12);
        }
示例#2
0
 public IEnumerable <DataCellCollection> GetDataRowEntries()
 {
     while (this._streamReader.EndOfStream == false)
     {
         yield return(this.BuildRow(CsvParseHelper.GetAllFields(this._streamReader.ReadLine())));
     }
 }
示例#3
0
        public bool Init()
        {
            this._logger.Log($"Preparing to Load Headers from {this._sourceFileName}", EventLevel.LogAlways);
            FileInfo fileInfo = new FileInfo(this._sourceFileName);

            this._logger.ShowTable($"File Properties {fileInfo.FullName}", new string[] { "Name", "Value" }, new List <string[]>
            {
                new string [] { nameof(fileInfo.Attributes), fileInfo.Attributes.ToString() },
                new string [] { nameof(fileInfo.Length), (fileInfo.Length / 1024) + " KB" },
                new string [] { nameof(fileInfo.CreationTime), fileInfo.CreationTime.ToString() },
                new string [] { nameof(fileInfo.LastAccessTime), fileInfo.LastAccessTime.ToString() },
                new string [] { nameof(fileInfo.Extension), fileInfo.Extension.ToString() },
            }
                                   );
            //TODO : update to use IdiskIohandler
            this._streamReader = new StreamReader(this._sourceFileName);

            string headerLine = _streamReader.ReadLine();

            string[] headerRow = CsvParseHelper.GetAllFields(headerLine);
            this.Columns.AddRange(headerRow.Select(item => new Column()
            {
                Name = item
            }).ToArray());
            this._dataCleaner.CleanHeader(this.Columns);
            this._logger.Log($"Loaded file Headers from {this._sourceFileName}", EventLevel.LogAlways);

            return(true);
        }
示例#4
0
        public bool PerformAutoClean(string dataSourceFileName, string csvTypeDef, int attempt)
        {
            string tempId = Guid.NewGuid().ToString().Replace("-", "") + ".csv";

            this._logger.LogInformation($"Attempting AutoClean : operationId{tempId}");

            if (this._diskIOHandler.FileExists(csvTypeDef))
            {
                // type based cleaning ; not implemented
                Dictionary <string, List <KeyValuePair <string, string> > > dictionary = JsonConvert.DeserializeObject <Dictionary <string, List <KeyValuePair <string, string> > > >(this._diskIOHandler.FileReadAllText(csvTypeDef));
            }

            int index = 0;

            using (StreamWriter streamWriter = new StreamWriter(this._diskIOHandler.FileWriteTextStream(tempId)))
            {
                using (StreamReader streamReader = new StreamReader(this._diskIOHandler.FileReadTextStream(dataSourceFileName)))
                {
                    string   line      = streamReader.ReadLine();
                    string[] headerRow = CsvParseHelper.GetAllFields(line);
                    streamWriter.WriteLine(line);

                    while (streamReader.EndOfStream == false)
                    {
                        line = streamReader.ReadLine();
                        string[] data = CsvParseHelper.GetAllFields(line);
                        // treat data misalliggnment
                        if (data.Length != headerRow.Length)
                        {
                            string[] lines = this.TreatMisAlignment(tempId, index, headerRow, line, data);
                            foreach (string alignedLine in lines)
                            {
                                streamWriter.WriteLine(alignedLine);
                            }
                        }
                        else
                        {
                            streamWriter.WriteLine(string.Join(",", data));
                        }

                        index++;
                    }
                }
            }
            this._diskIOHandler.FileCopy(dataSourceFileName, $"{dataSourceFileName}{tempId}.bak");
            this._diskIOHandler.FileCopy(tempId, dataSourceFileName, true);
            return(this.RunDataAnalysis(attempt));
        }
示例#5
0
        public bool RunDataAnalysis(int attempt = 0)
        {
            if (attempt > 2)
            {
                this._logger.Log("Attempt Limit has reached , Aborting analiysys", EventLevel.Error);
                return(false);
            }

            this._logger.LogInformation("Runing Analysis");

            this._logger.LogInformation("Scanning using text parser started");

            int csvLines = 0, textLines = 0, noError = 0;

            using (StreamReader stream = new StreamReader(this._diskIOHandler.FileReadTextStream(this._runtimeSettings.DataSourceFileName)))
            {
                int headerCount = CsvParseHelper.GetAllFields(stream.ReadLine()).Length;

                while (stream.EndOfStream == false)
                {
                    string line      = stream.ReadLine();
                    int    cellcount = CsvParseHelper.GetAllFields(line).Length;
                    if (cellcount != headerCount)
                    {
                        this._logger.Log($"Error Data Alignment mismatch cellcount {cellcount } != headerCount {headerCount } att position {textLines} , line :{line}", EventLevel.Error);
                        noError++;
                    }

                    textLines += 1;
                }
            }
            this._logger.LogInformation($"Scanning using text parser completed with {noError} errors ");
            noError = 0;
            this._logger.LogInformation("Scanning using csv parser started");

            using (IDataSource dataSource = this._dataSourceFactory.GetDataSource(this._runtimeSettings.DataSourceFileName))
            {
                int headerCount = dataSource.GetHeaders().Length;
                foreach (DataCellCollection row in dataSource.GetDataRowEntries())
                {
                    if (row.Cells.Count != headerCount)
                    {
                        this._logger.Log($"Error Data Alignment mismatch via csv parser cellcount {row.Cells.Count} != headerCount {headerCount } att position {csvLines}", EventLevel.Error);
                        noError++;
                    }
                    csvLines += 1;
                }
            }
            this._logger.LogInformation($"Scanning using csv parser completed with {noError} errors");
            this._runtimeSettings.TotalRows = textLines;

            if (textLines == csvLines && noError == 0)
            {
                this._logger.LogInformation("Text to record size mateched & alignment test passed ");
                return(true);
            }
            else
            {
                this._logger.Log($"Found mismatch in number of textLines {textLines} & Csv Lines {csvLines} - Please clean & make sure all the data is properly parsable", EventLevel.Error);
                string dataSourceFileName = this._runtimeSettings.DataSourceFileName;
                return(this.PerformAutoClean(dataSourceFileName, Path.GetFileNameWithoutExtension(dataSourceFileName) + "-csvdef.json", attempt + 1));
            }
        }