Ejemplo n.º 1
0
        private void DoWorkAndReport(IProducerConsumerCollection <string> inputCollection, IProducerConsumerCollection <object[]> outputCollection, ManualResetEvent pauseEvent, IProgress <int> progress)
        {
            if (Delimiter == null)
            {
                var outputMessage = "Delimiter is not set for this Stringsplitter";
                LogService.Instance.Error(outputMessage);
                throw new InvalidOperationException(outputMessage);
            }
            string[] _Delimiter = new string[] { Delimiter };
            string   InputString;
            int      ProcessedCount = 0;

            if (Qualifier != null)
            {
                while (HasWork)
                {
                    pauseEvent.WaitOne();
                    if (inputCollection.TryTake(out InputString))
                    {
                        string[] OutputString = StringAndText.SplitRow(InputString, Delimiter, Qualifier, false);

                        while (!outputCollection.TryAdd(OutputString))
                        {
                            pauseEvent.WaitOne();
                        }
                        ProcessedCount++;
                    }
                    else
                    {
                        Thread.Sleep(10);
                    }
                    if (ProcessedCount % 1000 == 0)
                    {
                        progress.Report(ProcessedCount);
                    }
                }
            }
            else
            {
                while (HasWork)
                {
                    pauseEvent.WaitOne();
                    if (inputCollection.TryTake(out InputString))
                    {
                        string[] OutputString = InputString.Split(_Delimiter, StringSplitOptions.None);

                        while (!outputCollection.TryAdd(OutputString))
                        {
                            pauseEvent.WaitOne();
                        }
                        ProcessedCount++;
                    }
                    if (ProcessedCount % 1000 == 0)
                    {
                        progress.Report(ProcessedCount);
                    }
                }
            }
            progress.Report(ProcessedCount);
        }
Ejemplo n.º 2
0
        private void Init(string file)
        {
            StreamReader reader      = new StreamReader(file);
            var          firstLine   = reader.ReadLine();
            int          columncount = StringAndText.SplitRow(firstLine, Context.Delimiter.ToString(), "\"", false).Count();

            for (int i = 0; i < columncount; i++)
            {
                ColumnCollection.Add(new ConcurrentStack <string>());
            }
        }
Ejemplo n.º 3
0
        public void QualifierSplitRowTest()
        {
            //no qualifier
            string input = @"foo|bar|zoo";

            string[] output = StringAndText.SplitRow(input, "|", null, false);
            Assert.IsTrue(output.Count() == 3);

            //qualifier
            input  = "foo|\"b|a|r\"|zoo";
            output = StringAndText.SplitRow(input, "|", "\"", false);

            Assert.IsTrue(output.Count() == 3);
            Assert.AreEqual(expected: "b|a|r", actual: output[1]);
        }
Ejemplo n.º 4
0
        public string[] SuggestDataType()
        {
            Init(Context.SourceFilePath); //initialize variables

            //put the first x lines into variables, order by column
            using (StreamReader reader = new StreamReader(Context.SourceFilePath))
            {
                var delimiterAsString = Context.Delimiter.ToString();
                if (Context.FirstLineContainsHeaders)
                {
                    reader.ReadLine();
                }                                                           //skip header line
                if (Context.SourceFileIsSourcedFromDial)
                {
                    reader.ReadLine();
                }                                                               //skip extra line for DIAL data
                string   line;
                string[] splitLine;
                for (int x = 0; x < ConfigVariables.Instance.Type_Suggestion_Sample_Lines_To_Scan; x++)
                {
                    if ((line = reader.ReadLine()) != null)
                    {
                        splitLine = StringAndText.SplitRow(line, delimiterAsString, "\"", false);
                        for (int i = 0; i < splitLine.Count(); i++)
                        {
                            ColumnCollection.ElementAt(i).Push(splitLine[i]);
                        }
                    }
                }
            }

            //suggest datatypes and push these on the stacks

            DoSuggestType(Context.StringPadding);
            List <string> types = new List <string>();

            foreach (ConcurrentStack <string> type in ColumnCollection)
            {
                string HURR;
                if (type.TryPop(out HURR))
                {
                    types.Add(HURR);
                }
            }
            return(types.ToArray());
        }
Ejemplo n.º 5
0
        public void SplitRowTest()
        {
            //single char delimiter
            string input = @"foo|bar|zoo";

            string[] output = StringAndText.SplitRow(input, "|", @"\", true);

            Assert.IsTrue(output.Count() == 3);

            //double char delimiter
            input = @"foo|||bar||zoo";

            output = StringAndText.SplitRow(input, "||", @"\", true);

            Assert.IsTrue(output.Count() == 3);
            Assert.AreEqual(expected: "|bar", actual: output[1]);
        }
Ejemplo n.º 6
0
        private void ProcessRecords(object x)
        {
            ConcurrentFlatFileExtractor reader = x as ConcurrentFlatFileExtractor;
            SimpleSqlTableLoader        writer = new SimpleSqlTableLoader(m_Context);
            string line;
            int    rowsProcessed = 0;
            int    numColumns    = m_Context.ColumnNames.Count();

            //if a selection is made on the source columns we will compute the ordinal rankings we require here
            int[] ordinalRankings = null;
            //if these are not equal a selection is made.
            if (numColumns != m_Context.ColumnNamesSelection.Count())
            {
                ordinalRankings = new int[m_Context.ColumnNamesSelection.Count()];
                int indexRankings = 0;
                //for every name in the total list, check if it is present in the selection and if so write its ordinal ranking to the array.
                //the rankings will be sorted low to high by design which also suits the simplesqlWriter in case it is in ordinal mode.
                for (int i = 0; i < numColumns; i++)
                {
                    if (m_Context.ColumnNamesSelection.Any(
                            selectedName => selectedName.Equals(m_Context.ColumnNames[i], StringComparison.InvariantCultureIgnoreCase)))
                    {
                        ordinalRankings[indexRankings++] = i;
                    }
                }
            }
            while (reader.TryExtractLine(out line))
            {
                string[] record = StringAndText.SplitRow(line, m_Context.Delimiter, m_Context.Qualifier, true);
                //assume the orindal rankings are identical (if all the pieces use the context.columnsnames property that will be the case
                //check the column count tho
                if (record.Count() != numColumns)
                {
                    var errorMsg = $"A row was skipped over because it had too many or too few columns, expected: {numColumns}, actual: {record.Count()}";
                    if (m_Context.IsSkippingError)
                    {
                        LogService.Instance.Warn(errorMsg);
                    }
                    else
                    {
                        Exception ex = new Exception(errorMsg);
                        LogService.Instance.Error(ex);
                        throw ex;
                    }
                }
                else
                {
                    var newRow = writer.GetEmptyRow();
                    //write all columns
                    if (ordinalRankings == null)
                    {
                        for (int i = 0; i < numColumns; i++)
                        {
                            newRow[i] = record[i];
                        }
                    }
                    //else write only selected columns (the indices we want are in the ordinalrankings array)
                    else
                    {
                        for (int i = 0; i < ordinalRankings.Count(); i++)
                        {
                            newRow[i] = record[ordinalRankings[i]];
                        }
                    }
                    writer.PostRecord(newRow);
                    if (++rowsProcessed % numberOfLines == 0)
                    {
                        OnRecordsProcessed(Thread.CurrentThread.Name);
                    }
                }
            }
            //flush final records and trigger last event
            writer.WriteRecords();
            OnRecordsProcessed(Thread.CurrentThread.Name, rowsProcessed % numberOfLines);
        }
Ejemplo n.º 7
0
        private void DoPausableWork(PipelineContext context, IProducerConsumerCollection <string[]> output, ManualResetEvent pauseEvent)
        {
            string filepath = context.SourceFilePath;
            long   capacity = m_Buffer;
            char   delim;

            if (context.Delimiter.Length == 1)
            {
                delim = context.Delimiter.ToCharArray()[0];
            }
            else
            {
                throw new InvalidCastException("MmfExtractor only supports single char delimiters");
            }
            string s_delim             = context.Delimiter.ToString();
            int    positionInByteArray = 0;

            //check if capacity isnt too high
            using (FileStream fs = new FileStream(filepath, FileMode.Open, FileAccess.Read))
            {
                if (fs.Length < capacity)
                {
                    capacity = fs.Length;
                }
            }
            using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(filepath, FileMode.Open, "D2SMMF", capacity, MemoryMappedFileAccess.Read))
            {
                using (MemoryMappedViewStream view = mmf.CreateViewStream(0, capacity, MemoryMappedFileAccess.Read))
                {
                    byte[] currentChunk = new byte[byteArraySize];
                    //figure out how many bytes we can read into the array (i.e. will we reach the end of the stream before the array is full or not)
                    int bytesToRead = 0;
                    //while the end of stream isnt reached...
                    while (view.Position < view.Length)
                    {
                        //check how many bytes we read (max of the size of array)
                        if ((view.Length - view.Position) < byteArraySize)
                        {
                            bytesToRead = (int)(view.Length - view.Position);
                        }
                        else
                        {
                            bytesToRead = byteArraySize;
                        }
                        //then read them
                        view.Read(currentChunk, m_LatestOffset, bytesToRead - m_LatestOffset);
                        //then loop over the array until a line break is encountered, extract the string and split it up.
                        for (int i = 0; i < byteArraySize; i++)
                        {
                            if (currentChunk[i] == 10) //10 is /n char
                            {
                                // it might be that the previous character is a /r char, we dont want this in the result string so if this is the case we will
                                //read one less byte
                                byte[] aboutToBeAString;
                                if (currentChunk[i - 1] == 13)
                                {
                                    aboutToBeAString = currentChunk.Skip(positionInByteArray).Take(i - positionInByteArray - 1).ToArray();
                                }
                                else
                                {
                                    aboutToBeAString = currentChunk.Skip(positionInByteArray).Take(i - positionInByteArray).ToArray();
                                }
                                positionInByteArray = i + 1;
                                m_LatestOffset      = byteArraySize - (i + 1); //the amount of bytes left in the array that havent been read and converted to strings
                                //convert the bytearray to unicode if it isn't already and then make a string out of it
                                string currentLine;
                                if (!m_encoding.EncodingName.Equals("Unicode"))
                                {
                                    currentLine = Encoding.Unicode.GetString(
                                        Encoding.Convert(m_encoding, Encoding.Unicode, aboutToBeAString));
                                }
                                else
                                {
                                    currentLine = m_encoding.GetString(aboutToBeAString);
                                }
                                string[] currentRow = StringAndText.SplitRow(currentLine, s_delim, "\\", false);
                                output.TryAdd(currentRow);
                            }
                            //dont bother once null terminators are reached (well they arent null terminators but w/e)
                            else if (currentChunk[i] == 0)
                            {
                                break;
                            }
                        }
                        // place remainder back at start of array
                        int index = 0;
                        for (int i = positionInByteArray; i < byteArraySize; i++)
                        {
                            currentChunk[index] = currentChunk[i];
                            index++;
                        }
                    }
                    //when we break out of this loop we will have some remainder left as the last line often is not terminated by a line break. we handle that remainder here.
                    byte[] aboutToBeLastString = currentChunk.TakeWhile(b => b != 0).ToArray(); //take bytes untill /null is encountered
                    string lastLine;
                    if (!m_encoding.EncodingName.Equals("Unicode"))
                    {
                        lastLine = Encoding.Unicode.GetString(
                            Encoding.Convert(m_encoding, Encoding.Unicode, aboutToBeLastString));
                    }
                    else
                    {
                        lastLine = m_encoding.GetString(aboutToBeLastString);
                    }
                    string[] lastRow = StringAndText.SplitRow(lastLine, s_delim, "\\", false);
                    output.TryAdd(lastRow);
                }
            }
        }