Exemple #1
0
 public void abort()
 {
     _aborted = true;
     _parser.abort();
     _results.meta.aborted = true;
     if (Papa.isFunction(_config.complete))
     {
         _config.complete(_results);
     }
     _input = "";
 }
Exemple #2
0
        protected override Result _nextChunk()
        {
            if (base._finished)
            {
                return(null);
            }
            int    size  = base._config.chunkSize;
            string chunk = size > 0 ? Papa.Substr(remaining, 0, size) : remaining;

            remaining      = size > 0 ? Papa.Substr(remaining, size) : "";
            base._finished = String.IsNullOrEmpty(remaining);
            return(base.parseChunk(chunk));
        }
        private void replaceConfig(Config config)
        {
            // Deep-copy the config so we can edit it
            Config configCopy = Papa.copy(config);

            //configCopy.chunkSize = parseInt(configCopy.chunkSize);	// parseInt VERY important so we don't concatenate strings!
            if (config.step == null && config.chunk == null)
            {
                configCopy.chunkSize = 0;  // disable Range header if not streaming; bad values break IIS - see issue #196
            }
            this._handle          = new ParserHandle(configCopy);
            this._handle.streamer = this;
            this._config          = configCopy; // persist the copy to the caller
        }
 protected void _sendError(Error error)
 {
     if (Papa.isFunction(this._config.error))
     {
         this._config.error(error);
     }
     else if (Papa.IS_WORKER && this._config.error != null)
     {
         //global.postMessage({
         //    workerId: Papa.WORKER_ID,
         //    error: error,
         //    finished: false
         //});
     }
 }
Exemple #5
0
        private string guessLineEndings(string input)
        {
            input = Papa.Substr(input, 0, 1024 * 1024); // max length 1 MB

            string[] r = input.Split('\r');

            if (r.Length == 1)
            {
                return("\n");
            }

            int numWithN = 0;

            for (int i = 0; i < r.Length; i++)
            {
                if (r[i][0] == '\n')
                {
                    numWithN++;
                }
            }

            return(numWithN >= r.Length / 2 ? "\r\n" : "\r");
        }
        public Result parse(string input, int baseIndex = 0, bool ignoreLastRow = false)
        {
            // We don't need to compute some of these every time parse() is called,
            // but having them in a more local scope seems to perform better
            int inputLen        = input.Length,
                delimLen        = delim.Length,
                newlineLen      = newline.Length,
                commentsLen     = comments.Length;
            bool stepIsFunction = step != null;

            string[] delimSplit = new string[] { delim },
            newlineSplit = new string[] { newline };


            // Returns an object with the results, errors, and meta.
            Func <bool, Result> returnable = (stopped) =>
            {
                return(new Result()
                {
                    data = data,
                    dataWithHeader = new List <Dictionary <string, string> >(),
                    errors = errors,
                    meta = new Meta()
                    {
                        delimiter = delim,
                        linebreak = newline,
                        aborted = aborted,
                        truncated = stopped,
                        cursor = lastCursor + baseIndex
                    }
                });
            };

            // Executes the user's step function and resets data & errors.
            Action doStep = () =>
            {
                step(returnable(false), null);
                data   = new List <List <string> >();
                errors = new List <Error>();
            };

            Action <List <string> > pushRow = (newRow) =>
            {
                data.Add(newRow);
                lastCursor = cursor;
            };

            // Appends the remaining input from cursor to the end into
            // row, saves the row, calls step, and returns the results.
            Func <string, Result> finish = (newValue) =>
            {
                if (ignoreLastRow)
                {
                    return(returnable(false));
                }
                if (newValue == null)
                {
                    newValue = Papa.Substr(input, cursor);
                }
                row.Add(newValue);
                cursor = inputLen;              // important in case parsing is paused
                pushRow(row);
                if (stepIsFunction)
                {
                    doStep();
                }
                return(returnable(false));
            };

            //------------------------------------------------------------------------------------------------------------------------------------------------------


            // Establish starting state
            cursor     = 0;
            data       = new List <List <string> >();
            errors     = new List <Error>();
            row        = new List <string>();
            lastCursor = 0;

            if (String.IsNullOrEmpty(input))
            {
                return(returnable(false));
            }

            if (fastMode == true || (fastMode != false && input.IndexOf(quoteChar) == -1))
            {
                string[] rows = input.Split(newlineSplit, StringSplitOptions.None);
                for (int i = 0; i < rows.Length; i++)
                {
                    string rowFast = rows[i];
                    cursor += rowFast.Length;
                    if (i != rows.Length - 1)
                    {
                        cursor += newline.Length;
                    }
                    else if (ignoreLastRow)
                    {
                        return(returnable(false));
                    }
                    if (!String.IsNullOrEmpty(comments) && Papa.Substr(rowFast, 0, commentsLen) == comments)
                    {
                        continue;
                    }
                    if (stepIsFunction)
                    {
                        data = new List <List <string> >();
                        pushRow(new List <string>(rowFast.Split(delimSplit, StringSplitOptions.None)));
                        doStep();
                        if (aborted)
                        {
                            return(returnable(false));
                        }
                    }
                    else
                    {
                        pushRow(new List <string>(rowFast.Split(delimSplit, StringSplitOptions.None)));
                    }
                    if (preview > 0 && i >= preview)
                    {
                        data = data.GetRange(0, preview);
                        return(returnable(true));
                    }
                }
                return(returnable(false));
            }

            int nextDelim   = input.IndexOf(delim, cursor);
            int nextNewline = input.IndexOf(newline, cursor);

            // Appends the current row to the results. It sets the cursor
            // to newCursor and finds the nextNewline. The caller should
            // take care to execute user's step function and check for
            // preview and end parsing if necessary.
            Action <int> saveRow = (newCursor) =>
            {
                cursor = newCursor;
                pushRow(row);
                row         = new List <string>();
                nextNewline = input.IndexOf(newline, cursor);
            };

            // Parser loop
            for (;;)
            {
                //[CR added so we never look behind the string]
                if (input.Length <= cursor)
                {
                    //System.Diagnostics.Debugger.Break();
                    break;
                }

                // Field has opening quote
                if (input[cursor] == quoteChar)
                {
                    // Start our search for the closing quote where the cursor is
                    int quoteSearch = cursor;

                    // Skip the opening quote
                    cursor++;

                    for (;;)
                    {
                        // Find closing quote
                        quoteSearch = input.IndexOf(quoteChar, quoteSearch + 1);

                        if (quoteSearch == -1)
                        {
                            if (!ignoreLastRow)
                            {
                                // No closing quote... what a pity
                                errors.Add(new Error()
                                {
                                    type    = "Quotes",
                                    code    = "MissingQuotes",
                                    message = "Quoted field unterminated",
                                    row     = data.Count,                                       // row has yet to be inserted
                                    index   = cursor
                                });
                            }
                            return(finish(null));
                        }

                        if (quoteSearch == inputLen - 1)
                        {
                            // Closing quote at EOF
                            string value = Regex.Replace(Papa.Substring(input, cursor, quoteSearch), quoteChar.ToString() + quoteChar.ToString(), quoteChar.ToString(), RegexOptions.Multiline);
                            return(finish(value));
                        }

                        // If this quote is escaped, it's part of the data; skip it
                        if (input[quoteSearch + 1] == quoteChar)
                        {
                            quoteSearch++;
                            continue;
                        }

                        if (input[quoteSearch + 1].ToString() == delim)
                        {
                            // Closing quote followed by delimiter
                            row.Add(Regex.Replace(Papa.Substring(input, cursor, quoteSearch), quoteChar.ToString() + quoteChar.ToString(), quoteChar.ToString(), RegexOptions.Multiline));
                            cursor      = quoteSearch + 1 + delimLen;
                            nextDelim   = input.IndexOf(delim, cursor);
                            nextNewline = input.IndexOf(newline, cursor);
                            break;
                        }

                        if (Papa.Substr(input, quoteSearch + 1, newlineLen) == newline)
                        {
                            // Closing quote followed by newline
                            row.Add(Regex.Replace(Papa.Substring(input, cursor, quoteSearch), quoteChar.ToString() + quoteChar.ToString(), quoteChar.ToString(), RegexOptions.Multiline));
                            saveRow(quoteSearch + 1 + newlineLen);
                            nextDelim = input.IndexOf(delim, cursor);                                   // because we may have skipped the nextDelim in the quoted field

                            if (stepIsFunction)
                            {
                                doStep();
                                if (aborted)
                                {
                                    return(returnable(false));
                                }
                            }

                            if (preview > 0 && data.Count >= preview)
                            {
                                return(returnable(true));
                            }

                            break;
                        }
                    }

                    continue;
                }

                // Comment found at start of new line
                if (!String.IsNullOrEmpty(comments) && row.Count == 0 && Papa.Substr(input, cursor, commentsLen) == comments)
                {
                    if (nextNewline == -1)                      // Comment ends at EOF
                    {
                        return(returnable(false));
                    }
                    cursor      = nextNewline + newlineLen;
                    nextNewline = input.IndexOf(newline, cursor);
                    nextDelim   = input.IndexOf(delim, cursor);
                    continue;
                }

                // Next delimiter comes before next newline, so we've reached end of field
                if (nextDelim != -1 && (nextDelim < nextNewline || nextNewline == -1))
                {
                    row.Add(Papa.Substring(input, cursor, nextDelim));
                    cursor    = nextDelim + delimLen;
                    nextDelim = input.IndexOf(delim, cursor);
                    continue;
                }

                // End of row
                if (nextNewline != -1)
                {
                    row.Add(Papa.Substring(input, cursor, nextNewline));
                    saveRow(nextNewline + newlineLen);

                    if (stepIsFunction)
                    {
                        doStep();
                        if (aborted)
                        {
                            return(returnable(false));
                        }
                    }

                    if (preview > 0 && data.Count >= preview)
                    {
                        return(returnable(true));
                    }

                    continue;
                }

                break;
            }

            return(finish(null));
        }
        public Result parseChunk(string chunk)
        {
            // First chunk pre-processing
            if (this.isFirstChunk && Papa.isFunction(this._config.beforeFirstChunk))
            {
                string modifiedChunk = this._config.beforeFirstChunk(chunk);
                if (modifiedChunk != null)
                {
                    chunk = modifiedChunk;
                }
            }
            this.isFirstChunk = false;

            // Rejoin the line we likely just split in two by chunking the file
            string aggregate = this._partialLine + chunk;

            this._partialLine = "";

            Result results = this._handle.parse(aggregate, this._baseIndex, !this._finished);

            if (this._handle.paused() || this._handle.aborted())
            {
                return(null);
            }

            int lastIndex = results.meta.cursor;

            if (!this._finished)
            {
                this._partialLine = Papa.Substring(aggregate, lastIndex - this._baseIndex);
                this._baseIndex   = lastIndex;
            }

            if (results != null && results.data != null)
            {
                this._rowCount += results.data.Count;
            }

            bool finishedIncludingPreview = this._finished || (this._config.preview > 0 && this._rowCount >= this._config.preview);

            if (Papa.IS_WORKER)
            {
                //global.postMessage({
                //    results: results,
                //    workerId: Papa.WORKER_ID,
                //    finished: finishedIncludingPreview
                //});
            }
            else if (Papa.isFunction(this._config.chunk))
            {
                this._config.chunk(results, this._handle);
                if (this._paused)
                {
                    return(null);
                }
                results = null;
                this._completeResults = null;
            }

            if (this._config.step == null && this._config.chunk == null)
            {
                this._completeResults.data = this._completeResults.data.Concat(results.data).ToList();

                this._completeResults.dataWithHeader = this._completeResults.dataWithHeader.Concat(results.dataWithHeader).ToList();
                this._completeResults.errors         = this._completeResults.errors.Concat(results.errors).ToList();
                this._completeResults.meta           = results.meta;
            }

            if (finishedIncludingPreview && Papa.isFunction(this._config.complete) && (results == null || !results.meta.aborted))
            {
                this._config.complete(this._completeResults);
            }

            if (!finishedIncludingPreview && (results == null || !results.meta.paused))
            {
                this._nextChunk();
            }

            return(results);
        }
Exemple #8
0
 public static string Substr(this string input, int startIndex)
 {
     return(Papa.Substring(input, startIndex));
 }
Exemple #9
0
        // Parses input. Most users won't need, and shouldn't mess with, the baseIndex
        // and ignoreLastRow parameters. They are used by streamers (wrapper functions)
        // when an input comes in multiple chunks, like from a file.
        public Result parse(string input, int baseIndex = 0, bool ignoreLastRow = false)
        {
            Func <bool> needsHeaderRow = () =>
            {
                return(_config.header && _fields.Count == 0);
            };

            Action fillHeaderFields = () =>
            {
                if (_results == null || _results.data.Count == 0)
                {
                    return;
                }
                for (int i = 0; needsHeaderRow() && i < _results.data.Count; i++)
                {
                    for (int j = 0; j < _results.data[i].Count; j++)
                    {
                        _fields.Add(_results.data[i][j]);
                    }
                }
                _results.data.RemoveRange(0, 1);
            };

            Func <Result> applyHeaderAndDynamicTyping = () =>
            {
                if (_results == null || (!_config.header && !_config.dynamicTyping))
                {
                    return(_results);
                }

                for (int i = 0; i < _results.data.Count; i++)
                {
                    Dictionary <string, string> rowWithHeader = new Dictionary <string, string>();

                    int j;
                    for (j = 0; j < _results.data[i].Count; j++)
                    {
                        //[TODO]
                        //if (_config.dynamicTyping)
                        //{
                        //    var value = _results.data[i][j];
                        //    if (value == "true" || value == "TRUE")
                        //        _results.data[i][j] = true;
                        //    else if (value == "false" || value == "FALSE")
                        //        _results.data[i][j] = false;
                        //    else
                        //        _results.data[i][j] = tryParseFloat(value);
                        //}

                        if (_config.header)
                        {
                            if (j >= _fields.Count)
                            {
                                if (!rowWithHeader.ContainsKey("__parsed_extra"))
                                {
                                    rowWithHeader.Add("__parsed_extra", "");
                                }
                                rowWithHeader["__parsed_extra"] += _results.data[i][j];
                                //[CR we can not simply put an Array into __parsed_extra, so juste pipe it]
                                if (j < _results.data[i].Count - 1)
                                {
                                    rowWithHeader["__parsed_extra"] += "|";
                                }
                            }
                            else
                            {
                                rowWithHeader[_fields[j]] = _results.data[i][j];
                            }
                        }
                    }

                    if (_config.header)
                    {
                        _results.dataWithHeader.Add(rowWithHeader); //[CR we are not overwriting _results.data here but instead fill another List]
                        if (j > _fields.Count)
                        {
                            addError("FieldMismatch", "TooManyFields", "Too many fields: expected " + _fields.Count + " fields but parsed " + j, i);
                        }
                        else if (j < _fields.Count)
                        {
                            addError("FieldMismatch", "TooFewFields", "Too few fields: expected " + _fields.Count + " fields but parsed " + j, i);
                        }
                    }
                }

                if (_config.header && _results.meta != null)
                {
                    _results.meta.fields = _fields;
                }
                return(_results);
            };

            Func <Result> processResults = () =>
            {
                if (_results != null && _delimiterError)
                {
                    addError("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
                    _delimiterError = false;
                }

                if (_config.skipEmptyLines)
                {
                    for (int i = 0; i < _results.data.Count; i++)
                    {
                        if (_results.data[i].Count == 1 && _results.data[i][0] == "")
                        {
                            _results.data.RemoveRange(i--, 1);
                        }
                    }
                }

                if (needsHeaderRow())
                {
                    fillHeaderFields();
                }

                return(applyHeaderAndDynamicTyping());
            };

            //------------------------------------------------------------------------------------------------------------------------------------------------------

            if (String.IsNullOrEmpty(_config.newline))
            {
                _config.newline = guessLineEndings(input);
            }

            _delimiterError = false;
            if (String.IsNullOrEmpty(_config.delimiter))
            {
                DelimiterResult delimGuess = guessDelimiter(input);
                if (delimGuess.successful)
                {
                    _config.delimiter = delimGuess.bestDelimiter;
                }
                else
                {
                    _delimiterError   = true;                   // add error after parsing (otherwise it would be overwritten)
                    _config.delimiter = Papa.DefaultDelimiter;
                }
                _results.meta.delimiter = _config.delimiter;
            }

            if (_config.quoteChar == Char.MinValue)
            {
                if (Papa.Substr(input, 0, 1) == "'" && Papa.Substr(input, input.IndexOf(_config.delimiter, 0) - 1, 1) == "'")
                {
                    _config.quoteChar = '\'';
                }
                else
                {
                    _config.quoteChar = '"';
                }
            }

            Config parserConfig = Papa.copy(_config);

            if (_config.preview > 0 && _config.header)
            {
                parserConfig.preview++;                 // to compensate for header row
            }
            if (Papa.isFunction(_config.step))
            {
                Action <Result, ParserHandle> userStep = _config.step;
                parserConfig.step = (results, parser) =>
                {
                    _results = results;

                    if (needsHeaderRow())
                    {
                        processResults();
                    }
                    else        // only call user's step function after header row
                    {
                        processResults();

                        // It's possbile that this line was empty and there's no row here after all
                        if (_results.data.Count == 0)
                        {
                            return;
                        }

                        _stepCounter += results.data.Count;
                        if (parserConfig.preview > 0 && _stepCounter > parserConfig.preview)
                        {
                            _parser.abort();
                        }
                        else
                        {
                            userStep(_results, this);
                        }
                    }
                };
            }
            //----------------------------------------------------------------------

            _input   = input;
            _parser  = new Parser(parserConfig);
            _results = _parser.parse(_input, baseIndex, ignoreLastRow);
            processResults();

            if (_paused)
            {
                return new Result()
                       {
                           meta = new Meta()
                           {
                               paused = true
                           }
                       }
            }
            ;
            else if (_results != null)
            {
                return(_results);
            }
            else
            {
                return new Result()
                       {
                           meta = new Meta()
                           {
                               paused = false
                           }
                       }
            };
        }
Exemple #10
0
 public void pause()
 {
     _paused = true;
     _parser.abort();
     _input = Papa.Substr(_input, _parser.getCharIndex());
 }