private void DoWorkAndReport(IProducerConsumerCollection <string> inputCollection, IProducerConsumerCollection <object[]> outputCollection, ManualResetEvent pauseEvent, IProgress <int> progress) { if (Delimiter == null) { var outputMessage = "Delimiter is not set for this Stringsplitter"; LogService.Instance.Error(outputMessage); throw new InvalidOperationException(outputMessage); } string[] _Delimiter = new string[] { Delimiter }; string InputString; int ProcessedCount = 0; if (Qualifier != null) { while (HasWork) { pauseEvent.WaitOne(); if (inputCollection.TryTake(out InputString)) { string[] OutputString = StringAndText.SplitRow(InputString, Delimiter, Qualifier, false); while (!outputCollection.TryAdd(OutputString)) { pauseEvent.WaitOne(); } ProcessedCount++; } else { Thread.Sleep(10); } if (ProcessedCount % 1000 == 0) { progress.Report(ProcessedCount); } } } else { while (HasWork) { pauseEvent.WaitOne(); if (inputCollection.TryTake(out InputString)) { string[] OutputString = InputString.Split(_Delimiter, StringSplitOptions.None); while (!outputCollection.TryAdd(OutputString)) { pauseEvent.WaitOne(); } ProcessedCount++; } if (ProcessedCount % 1000 == 0) { progress.Report(ProcessedCount); } } } progress.Report(ProcessedCount); }
private void Init(string file) { StreamReader reader = new StreamReader(file); var firstLine = reader.ReadLine(); int columncount = StringAndText.SplitRow(firstLine, Context.Delimiter.ToString(), "\"", false).Count(); for (int i = 0; i < columncount; i++) { ColumnCollection.Add(new ConcurrentStack <string>()); } }
public void QualifierSplitRowTest() { //no qualifier string input = @"foo|bar|zoo"; string[] output = StringAndText.SplitRow(input, "|", null, false); Assert.IsTrue(output.Count() == 3); //qualifier input = "foo|\"b|a|r\"|zoo"; output = StringAndText.SplitRow(input, "|", "\"", false); Assert.IsTrue(output.Count() == 3); Assert.AreEqual(expected: "b|a|r", actual: output[1]); }
public string[] SuggestDataType() { Init(Context.SourceFilePath); //initialize variables //put the first x lines into variables, order by column using (StreamReader reader = new StreamReader(Context.SourceFilePath)) { var delimiterAsString = Context.Delimiter.ToString(); if (Context.FirstLineContainsHeaders) { reader.ReadLine(); } //skip header line if (Context.SourceFileIsSourcedFromDial) { reader.ReadLine(); } //skip extra line for DIAL data string line; string[] splitLine; for (int x = 0; x < ConfigVariables.Instance.Type_Suggestion_Sample_Lines_To_Scan; x++) { if ((line = reader.ReadLine()) != null) { splitLine = StringAndText.SplitRow(line, delimiterAsString, "\"", false); for (int i = 0; i < splitLine.Count(); i++) { ColumnCollection.ElementAt(i).Push(splitLine[i]); } } } } //suggest datatypes and push these on the stacks DoSuggestType(Context.StringPadding); List <string> types = new List <string>(); foreach (ConcurrentStack <string> type in ColumnCollection) { string HURR; if (type.TryPop(out HURR)) { types.Add(HURR); } } return(types.ToArray()); }
public void SplitRowTest() { //single char delimiter string input = @"foo|bar|zoo"; string[] output = StringAndText.SplitRow(input, "|", @"\", true); Assert.IsTrue(output.Count() == 3); //double char delimiter input = @"foo|||bar||zoo"; output = StringAndText.SplitRow(input, "||", @"\", true); Assert.IsTrue(output.Count() == 3); Assert.AreEqual(expected: "|bar", actual: output[1]); }
private void ProcessRecords(object x) { ConcurrentFlatFileExtractor reader = x as ConcurrentFlatFileExtractor; SimpleSqlTableLoader writer = new SimpleSqlTableLoader(m_Context); string line; int rowsProcessed = 0; int numColumns = m_Context.ColumnNames.Count(); //if a selection is made on the source columns we will compute the ordinal rankings we require here int[] ordinalRankings = null; //if these are not equal a selection is made. if (numColumns != m_Context.ColumnNamesSelection.Count()) { ordinalRankings = new int[m_Context.ColumnNamesSelection.Count()]; int indexRankings = 0; //for every name in the total list, check if it is present in the selection and if so write its ordinal ranking to the array. //the rankings will be sorted low to high by design which also suits the simplesqlWriter in case it is in ordinal mode. for (int i = 0; i < numColumns; i++) { if (m_Context.ColumnNamesSelection.Any( selectedName => selectedName.Equals(m_Context.ColumnNames[i], StringComparison.InvariantCultureIgnoreCase))) { ordinalRankings[indexRankings++] = i; } } } while (reader.TryExtractLine(out line)) { string[] record = StringAndText.SplitRow(line, m_Context.Delimiter, m_Context.Qualifier, true); //assume the orindal rankings are identical (if all the pieces use the context.columnsnames property that will be the case //check the column count tho if (record.Count() != numColumns) { var errorMsg = $"A row was skipped over because it had too many or too few columns, expected: {numColumns}, actual: {record.Count()}"; if (m_Context.IsSkippingError) { LogService.Instance.Warn(errorMsg); } else { Exception ex = new Exception(errorMsg); LogService.Instance.Error(ex); throw ex; } } else { var newRow = writer.GetEmptyRow(); //write all columns if (ordinalRankings == null) { for (int i = 0; i < numColumns; i++) { newRow[i] = record[i]; } } //else write only selected columns (the indices we want are in the ordinalrankings array) else { for (int i = 0; i < ordinalRankings.Count(); i++) { newRow[i] = record[ordinalRankings[i]]; } } writer.PostRecord(newRow); if (++rowsProcessed % numberOfLines == 0) { OnRecordsProcessed(Thread.CurrentThread.Name); } } } //flush final records and trigger last event writer.WriteRecords(); OnRecordsProcessed(Thread.CurrentThread.Name, rowsProcessed % numberOfLines); }
private void DoPausableWork(PipelineContext context, IProducerConsumerCollection <string[]> output, ManualResetEvent pauseEvent) { string filepath = context.SourceFilePath; long capacity = m_Buffer; char delim; if (context.Delimiter.Length == 1) { delim = context.Delimiter.ToCharArray()[0]; } else { throw new InvalidCastException("MmfExtractor only supports single char delimiters"); } string s_delim = context.Delimiter.ToString(); int positionInByteArray = 0; //check if capacity isnt too high using (FileStream fs = new FileStream(filepath, FileMode.Open, FileAccess.Read)) { if (fs.Length < capacity) { capacity = fs.Length; } } using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(filepath, FileMode.Open, "D2SMMF", capacity, MemoryMappedFileAccess.Read)) { using (MemoryMappedViewStream view = mmf.CreateViewStream(0, capacity, MemoryMappedFileAccess.Read)) { byte[] currentChunk = new byte[byteArraySize]; //figure out how many bytes we can read into the array (i.e. will we reach the end of the stream before the array is full or not) int bytesToRead = 0; //while the end of stream isnt reached... while (view.Position < view.Length) { //check how many bytes we read (max of the size of array) if ((view.Length - view.Position) < byteArraySize) { bytesToRead = (int)(view.Length - view.Position); } else { bytesToRead = byteArraySize; } //then read them view.Read(currentChunk, m_LatestOffset, bytesToRead - m_LatestOffset); //then loop over the array until a line break is encountered, extract the string and split it up. for (int i = 0; i < byteArraySize; i++) { if (currentChunk[i] == 10) //10 is /n char { // it might be that the previous character is a /r char, we dont want this in the result string so if this is the case we will //read one less byte byte[] aboutToBeAString; if (currentChunk[i - 1] == 13) { aboutToBeAString = currentChunk.Skip(positionInByteArray).Take(i - positionInByteArray - 1).ToArray(); } else { aboutToBeAString = currentChunk.Skip(positionInByteArray).Take(i - positionInByteArray).ToArray(); } positionInByteArray = i + 1; m_LatestOffset = byteArraySize - (i + 1); //the amount of bytes left in the array that havent been read and converted to strings //convert the bytearray to unicode if it isn't already and then make a string out of it string currentLine; if (!m_encoding.EncodingName.Equals("Unicode")) { currentLine = Encoding.Unicode.GetString( Encoding.Convert(m_encoding, Encoding.Unicode, aboutToBeAString)); } else { currentLine = m_encoding.GetString(aboutToBeAString); } string[] currentRow = StringAndText.SplitRow(currentLine, s_delim, "\\", false); output.TryAdd(currentRow); } //dont bother once null terminators are reached (well they arent null terminators but w/e) else if (currentChunk[i] == 0) { break; } } // place remainder back at start of array int index = 0; for (int i = positionInByteArray; i < byteArraySize; i++) { currentChunk[index] = currentChunk[i]; index++; } } //when we break out of this loop we will have some remainder left as the last line often is not terminated by a line break. we handle that remainder here. byte[] aboutToBeLastString = currentChunk.TakeWhile(b => b != 0).ToArray(); //take bytes untill /null is encountered string lastLine; if (!m_encoding.EncodingName.Equals("Unicode")) { lastLine = Encoding.Unicode.GetString( Encoding.Convert(m_encoding, Encoding.Unicode, aboutToBeLastString)); } else { lastLine = m_encoding.GetString(aboutToBeLastString); } string[] lastRow = StringAndText.SplitRow(lastLine, s_delim, "\\", false); output.TryAdd(lastRow); } } }