public void abort() { _aborted = true; _parser.abort(); _results.meta.aborted = true; if (Papa.isFunction(_config.complete)) { _config.complete(_results); } _input = ""; }
protected override Result _nextChunk() { if (base._finished) { return(null); } int size = base._config.chunkSize; string chunk = size > 0 ? Papa.Substr(remaining, 0, size) : remaining; remaining = size > 0 ? Papa.Substr(remaining, size) : ""; base._finished = String.IsNullOrEmpty(remaining); return(base.parseChunk(chunk)); }
private void replaceConfig(Config config) { // Deep-copy the config so we can edit it Config configCopy = Papa.copy(config); //configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings! if (config.step == null && config.chunk == null) { configCopy.chunkSize = 0; // disable Range header if not streaming; bad values break IIS - see issue #196 } this._handle = new ParserHandle(configCopy); this._handle.streamer = this; this._config = configCopy; // persist the copy to the caller }
protected void _sendError(Error error) { if (Papa.isFunction(this._config.error)) { this._config.error(error); } else if (Papa.IS_WORKER && this._config.error != null) { //global.postMessage({ // workerId: Papa.WORKER_ID, // error: error, // finished: false //}); } }
private string guessLineEndings(string input) { input = Papa.Substr(input, 0, 1024 * 1024); // max length 1 MB string[] r = input.Split('\r'); if (r.Length == 1) { return("\n"); } int numWithN = 0; for (int i = 0; i < r.Length; i++) { if (r[i][0] == '\n') { numWithN++; } } return(numWithN >= r.Length / 2 ? "\r\n" : "\r"); }
public Result parse(string input, int baseIndex = 0, bool ignoreLastRow = false) { // We don't need to compute some of these every time parse() is called, // but having them in a more local scope seems to perform better int inputLen = input.Length, delimLen = delim.Length, newlineLen = newline.Length, commentsLen = comments.Length; bool stepIsFunction = step != null; string[] delimSplit = new string[] { delim }, newlineSplit = new string[] { newline }; // Returns an object with the results, errors, and meta. Func <bool, Result> returnable = (stopped) => { return(new Result() { data = data, dataWithHeader = new List <Dictionary <string, string> >(), errors = errors, meta = new Meta() { delimiter = delim, linebreak = newline, aborted = aborted, truncated = stopped, cursor = lastCursor + baseIndex } }); }; // Executes the user's step function and resets data & errors. Action doStep = () => { step(returnable(false), null); data = new List <List <string> >(); errors = new List <Error>(); }; Action <List <string> > pushRow = (newRow) => { data.Add(newRow); lastCursor = cursor; }; // Appends the remaining input from cursor to the end into // row, saves the row, calls step, and returns the results. Func <string, Result> finish = (newValue) => { if (ignoreLastRow) { return(returnable(false)); } if (newValue == null) { newValue = Papa.Substr(input, cursor); } row.Add(newValue); cursor = inputLen; // important in case parsing is paused pushRow(row); if (stepIsFunction) { doStep(); } return(returnable(false)); }; //------------------------------------------------------------------------------------------------------------------------------------------------------ // Establish starting state cursor = 0; data = new List <List <string> >(); errors = new List <Error>(); row = new List <string>(); lastCursor = 0; if (String.IsNullOrEmpty(input)) { return(returnable(false)); } if (fastMode == true || (fastMode != false && input.IndexOf(quoteChar) == -1)) { string[] rows = input.Split(newlineSplit, StringSplitOptions.None); for (int i = 0; i < rows.Length; i++) { string rowFast = rows[i]; cursor += rowFast.Length; if (i != rows.Length - 1) { cursor += newline.Length; } else if (ignoreLastRow) { return(returnable(false)); } if (!String.IsNullOrEmpty(comments) && Papa.Substr(rowFast, 0, commentsLen) == comments) { continue; } if (stepIsFunction) { data = new List <List <string> >(); pushRow(new List <string>(rowFast.Split(delimSplit, StringSplitOptions.None))); doStep(); if (aborted) { return(returnable(false)); } } else { pushRow(new List <string>(rowFast.Split(delimSplit, StringSplitOptions.None))); } if (preview > 0 && i >= preview) { data = data.GetRange(0, preview); return(returnable(true)); } } return(returnable(false)); } int nextDelim = input.IndexOf(delim, cursor); int nextNewline = input.IndexOf(newline, cursor); // Appends the current row to the results. It sets the cursor // to newCursor and finds the nextNewline. The caller should // take care to execute user's step function and check for // preview and end parsing if necessary. Action <int> saveRow = (newCursor) => { cursor = newCursor; pushRow(row); row = new List <string>(); nextNewline = input.IndexOf(newline, cursor); }; // Parser loop for (;;) { //[CR added so we never look behind the string] if (input.Length <= cursor) { //System.Diagnostics.Debugger.Break(); break; } // Field has opening quote if (input[cursor] == quoteChar) { // Start our search for the closing quote where the cursor is int quoteSearch = cursor; // Skip the opening quote cursor++; for (;;) { // Find closing quote quoteSearch = input.IndexOf(quoteChar, quoteSearch + 1); if (quoteSearch == -1) { if (!ignoreLastRow) { // No closing quote... what a pity errors.Add(new Error() { type = "Quotes", code = "MissingQuotes", message = "Quoted field unterminated", row = data.Count, // row has yet to be inserted index = cursor }); } return(finish(null)); } if (quoteSearch == inputLen - 1) { // Closing quote at EOF string value = Regex.Replace(Papa.Substring(input, cursor, quoteSearch), quoteChar.ToString() + quoteChar.ToString(), quoteChar.ToString(), RegexOptions.Multiline); return(finish(value)); } // If this quote is escaped, it's part of the data; skip it if (input[quoteSearch + 1] == quoteChar) { quoteSearch++; continue; } if (input[quoteSearch + 1].ToString() == delim) { // Closing quote followed by delimiter row.Add(Regex.Replace(Papa.Substring(input, cursor, quoteSearch), quoteChar.ToString() + quoteChar.ToString(), quoteChar.ToString(), RegexOptions.Multiline)); cursor = quoteSearch + 1 + delimLen; nextDelim = input.IndexOf(delim, cursor); nextNewline = input.IndexOf(newline, cursor); break; } if (Papa.Substr(input, quoteSearch + 1, newlineLen) == newline) { // Closing quote followed by newline row.Add(Regex.Replace(Papa.Substring(input, cursor, quoteSearch), quoteChar.ToString() + quoteChar.ToString(), quoteChar.ToString(), RegexOptions.Multiline)); saveRow(quoteSearch + 1 + newlineLen); nextDelim = input.IndexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field if (stepIsFunction) { doStep(); if (aborted) { return(returnable(false)); } } if (preview > 0 && data.Count >= preview) { return(returnable(true)); } break; } } continue; } // Comment found at start of new line if (!String.IsNullOrEmpty(comments) && row.Count == 0 && Papa.Substr(input, cursor, commentsLen) == comments) { if (nextNewline == -1) // Comment ends at EOF { return(returnable(false)); } cursor = nextNewline + newlineLen; nextNewline = input.IndexOf(newline, cursor); nextDelim = input.IndexOf(delim, cursor); continue; } // Next delimiter comes before next newline, so we've reached end of field if (nextDelim != -1 && (nextDelim < nextNewline || nextNewline == -1)) { row.Add(Papa.Substring(input, cursor, nextDelim)); cursor = nextDelim + delimLen; nextDelim = input.IndexOf(delim, cursor); continue; } // End of row if (nextNewline != -1) { row.Add(Papa.Substring(input, cursor, nextNewline)); saveRow(nextNewline + newlineLen); if (stepIsFunction) { doStep(); if (aborted) { return(returnable(false)); } } if (preview > 0 && data.Count >= preview) { return(returnable(true)); } continue; } break; } return(finish(null)); }
public Result parseChunk(string chunk) { // First chunk pre-processing if (this.isFirstChunk && Papa.isFunction(this._config.beforeFirstChunk)) { string modifiedChunk = this._config.beforeFirstChunk(chunk); if (modifiedChunk != null) { chunk = modifiedChunk; } } this.isFirstChunk = false; // Rejoin the line we likely just split in two by chunking the file string aggregate = this._partialLine + chunk; this._partialLine = ""; Result results = this._handle.parse(aggregate, this._baseIndex, !this._finished); if (this._handle.paused() || this._handle.aborted()) { return(null); } int lastIndex = results.meta.cursor; if (!this._finished) { this._partialLine = Papa.Substring(aggregate, lastIndex - this._baseIndex); this._baseIndex = lastIndex; } if (results != null && results.data != null) { this._rowCount += results.data.Count; } bool finishedIncludingPreview = this._finished || (this._config.preview > 0 && this._rowCount >= this._config.preview); if (Papa.IS_WORKER) { //global.postMessage({ // results: results, // workerId: Papa.WORKER_ID, // finished: finishedIncludingPreview //}); } else if (Papa.isFunction(this._config.chunk)) { this._config.chunk(results, this._handle); if (this._paused) { return(null); } results = null; this._completeResults = null; } if (this._config.step == null && this._config.chunk == null) { this._completeResults.data = this._completeResults.data.Concat(results.data).ToList(); this._completeResults.dataWithHeader = this._completeResults.dataWithHeader.Concat(results.dataWithHeader).ToList(); this._completeResults.errors = this._completeResults.errors.Concat(results.errors).ToList(); this._completeResults.meta = results.meta; } if (finishedIncludingPreview && Papa.isFunction(this._config.complete) && (results == null || !results.meta.aborted)) { this._config.complete(this._completeResults); } if (!finishedIncludingPreview && (results == null || !results.meta.paused)) { this._nextChunk(); } return(results); }
public static string Substr(this string input, int startIndex) { return(Papa.Substring(input, startIndex)); }
// Parses input. Most users won't need, and shouldn't mess with, the baseIndex // and ignoreLastRow parameters. They are used by streamers (wrapper functions) // when an input comes in multiple chunks, like from a file. public Result parse(string input, int baseIndex = 0, bool ignoreLastRow = false) { Func <bool> needsHeaderRow = () => { return(_config.header && _fields.Count == 0); }; Action fillHeaderFields = () => { if (_results == null || _results.data.Count == 0) { return; } for (int i = 0; needsHeaderRow() && i < _results.data.Count; i++) { for (int j = 0; j < _results.data[i].Count; j++) { _fields.Add(_results.data[i][j]); } } _results.data.RemoveRange(0, 1); }; Func <Result> applyHeaderAndDynamicTyping = () => { if (_results == null || (!_config.header && !_config.dynamicTyping)) { return(_results); } for (int i = 0; i < _results.data.Count; i++) { Dictionary <string, string> rowWithHeader = new Dictionary <string, string>(); int j; for (j = 0; j < _results.data[i].Count; j++) { //[TODO] //if (_config.dynamicTyping) //{ // var value = _results.data[i][j]; // if (value == "true" || value == "TRUE") // _results.data[i][j] = true; // else if (value == "false" || value == "FALSE") // _results.data[i][j] = false; // else // _results.data[i][j] = tryParseFloat(value); //} if (_config.header) { if (j >= _fields.Count) { if (!rowWithHeader.ContainsKey("__parsed_extra")) { rowWithHeader.Add("__parsed_extra", ""); } rowWithHeader["__parsed_extra"] += _results.data[i][j]; //[CR we can not simply put an Array into __parsed_extra, so juste pipe it] if (j < _results.data[i].Count - 1) { rowWithHeader["__parsed_extra"] += "|"; } } else { rowWithHeader[_fields[j]] = _results.data[i][j]; } } } if (_config.header) { _results.dataWithHeader.Add(rowWithHeader); //[CR we are not overwriting _results.data here but instead fill another List] if (j > _fields.Count) { addError("FieldMismatch", "TooManyFields", "Too many fields: expected " + _fields.Count + " fields but parsed " + j, i); } else if (j < _fields.Count) { addError("FieldMismatch", "TooFewFields", "Too few fields: expected " + _fields.Count + " fields but parsed " + j, i); } } } if (_config.header && _results.meta != null) { _results.meta.fields = _fields; } return(_results); }; Func <Result> processResults = () => { if (_results != null && _delimiterError) { addError("Delimiter", "UndetectableDelimiter", "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'"); _delimiterError = false; } if (_config.skipEmptyLines) { for (int i = 0; i < _results.data.Count; i++) { if (_results.data[i].Count == 1 && _results.data[i][0] == "") { _results.data.RemoveRange(i--, 1); } } } if (needsHeaderRow()) { fillHeaderFields(); } return(applyHeaderAndDynamicTyping()); }; //------------------------------------------------------------------------------------------------------------------------------------------------------ if (String.IsNullOrEmpty(_config.newline)) { _config.newline = guessLineEndings(input); } _delimiterError = false; if (String.IsNullOrEmpty(_config.delimiter)) { DelimiterResult delimGuess = guessDelimiter(input); if (delimGuess.successful) { _config.delimiter = delimGuess.bestDelimiter; } else { _delimiterError = true; // add error after parsing (otherwise it would be overwritten) _config.delimiter = Papa.DefaultDelimiter; } _results.meta.delimiter = _config.delimiter; } if (_config.quoteChar == Char.MinValue) { if (Papa.Substr(input, 0, 1) == "'" && Papa.Substr(input, input.IndexOf(_config.delimiter, 0) - 1, 1) == "'") { _config.quoteChar = '\''; } else { _config.quoteChar = '"'; } } Config parserConfig = Papa.copy(_config); if (_config.preview > 0 && _config.header) { parserConfig.preview++; // to compensate for header row } if (Papa.isFunction(_config.step)) { Action <Result, ParserHandle> userStep = _config.step; parserConfig.step = (results, parser) => { _results = results; if (needsHeaderRow()) { processResults(); } else // only call user's step function after header row { processResults(); // It's possbile that this line was empty and there's no row here after all if (_results.data.Count == 0) { return; } _stepCounter += results.data.Count; if (parserConfig.preview > 0 && _stepCounter > parserConfig.preview) { _parser.abort(); } else { userStep(_results, this); } } }; } //---------------------------------------------------------------------- _input = input; _parser = new Parser(parserConfig); _results = _parser.parse(_input, baseIndex, ignoreLastRow); processResults(); if (_paused) { return new Result() { meta = new Meta() { paused = true } } } ; else if (_results != null) { return(_results); } else { return new Result() { meta = new Meta() { paused = false } } }; }
public void pause() { _paused = true; _parser.abort(); _input = Papa.Substr(_input, _parser.getCharIndex()); }