/// <summary> /// Create next token from BibTeX stream /// </summary> /// <returns>Return instance of BibTeXToken class</returns> public BibTeXToken GetNextToken() { BibTeXToken bibToken = new BibTeXToken(); StringBuilder token = new StringBuilder(); string tokenInput = string.Empty; bibToken.LineNumber = this.currentTokenLineNumber; bibToken.ColumnNumber = this.currentTokenColumnNumber + 1; try { // If below condition met then field value is occurred in the BibTeX stream // If below condition met then field value is occurred in the BibTeX stream if ((this.prevToPrevToken.Equals(BibTeXHelper.BT_ENTRY_EQUAL) && (this.prevToken.Equals(BibTeXHelper.BT_ENTRY_LEFT_BRACE) || this.prevToken.Equals(BibTeXHelper.BT_ENTRY_QUOTATION))) || (this.prevToPrevToken.Equals(BibTeXHelper.BT_ENTRY_CONCATENATION) && this.prevToken.Equals(BibTeXHelper.BT_ENTRY_QUOTATION)) ) { List <string> fieldValue = this.ProcessFieldValue(); foreach (string fieldData in fieldValue) { token.Append(fieldData.Trim()); } bibToken.Value = token.ToString(); } else { if (!this.lastTokenEncountered) { tokenInput = this.GetToken(this.entityRegEx); bibToken.Value = tokenInput; } } } catch (BibTeXParserException ex) { // This can happen only when the token has un balanced parenthesis. // Here we update the line and column numbers only bibToken.Value = ex.ErrorToken; FormatToken(bibToken); throw new BibTeXParserException(ex.Message, bibToken.LineNumber, bibToken.ColumnNumber, bibToken.Value); } // TOD0: check it update passing parameter. Is it require to return BibTeXToken by FormatToken method FormatToken(bibToken); return(bibToken); }
public bool Load(Stream inStream) { #region Descritption function // Algorithm to implement this function: // // 1. Clear _parserExceptions and _symbolTable collections and initialize // the tokenizer object // 2. Create a Tokenize object and pass it stream object. // 3. A tool that will process each entry starting with @ // a) Build a set of tokens of using tokenizer. the first // element in this set should be '@' and the last should be '}' // (token before the next entry in the BibTeX file. // b) Pass this set to the BibTeX paser's GetBibTeXEntry and retrieve the BibTeXEnty object. // If _throwParserException is false, encapsulate this call in try/catch and store any exception. // c) Add the BibTeXEntry to the collection (which will be returned). // #endregion List <BibTeXEntry> bibTeXEntries = new List <BibTeXEntry>(); Dictionary <string, string> symbolTable = new Dictionary <string, string>(); BibTeXTokenizer tokenProvider = new BibTeXTokenizer(inStream); BibTeXParser parser = new BibTeXParser(symbolTable); this._parserExceptions.Clear(); //Queue to store any prepopulated tokens Queue <BibTeXToken> retrievedTokens = new Queue <BibTeXToken>(); while (!tokenProvider.LastTokenEncountered) { int bracesStack = 0; int parenthesisStack = 0; List <BibTeXToken> tokenSet = new List <BibTeXToken>(); #region Build a set of tokens starting with "@" and ending with "}" (upto next '@' symbol) try { //Dequeue, if any other token retrieved earlier while (retrievedTokens.Count > 0) { BibTeXToken token = retrievedTokens.Dequeue(); if (String.Compare(token.Value, BibTeXHelper.BT_ENTRY_AT, StringComparison.OrdinalIgnoreCase) == 0) { tokenSet.Add(token); } } while (!tokenProvider.LastTokenEncountered) { BibTeXToken token = tokenProvider.GetNextToken(); if (tokenSet.Count == 0 && String.Compare(token.Value, BibTeXHelper.BT_ENTRY_AT, StringComparison.OrdinalIgnoreCase) != 0 ) { // Skip all tokens until we find '@'. Any token outside the '@' are treated as comments continue; } if (!String.IsNullOrEmpty(token.Value)) // just to make sure. { if (tokenSet.Count > 2 && bracesStack == 0 && parenthesisStack == 0) { // Enqueue current token. Will use it for next entry. retrievedTokens.Enqueue(token); break; } if (String.Compare(token.Value, BibTeXHelper.BT_ENTRY_AT, StringComparison.OrdinalIgnoreCase) == 0 && tokenSet.Count != 0 ) { // Enqueue current token. Will use it for next entry. retrievedTokens.Enqueue(token); break; } tokenSet.Add(token); // Manage stack if (String.Compare(token.Value, BibTeXHelper.BT_ENTRY_LEFT_BRACE, StringComparison.OrdinalIgnoreCase) == 0) { bracesStack++; } else if (String.Compare(token.Value, BibTeXHelper.BT_ENTRY_RIGHT_BRACE, StringComparison.OrdinalIgnoreCase) == 0) { bracesStack--; } else if (String.Compare(token.Value, BibTeXHelper.BT_ENTRY_LEFT_PARENTHESIS, StringComparison.OrdinalIgnoreCase) == 0) { parenthesisStack++; } else if (String.Compare(token.Value, BibTeXHelper.BT_ENTRY_RIGHT_PARENTHESIS, StringComparison.OrdinalIgnoreCase) == 0) { parenthesisStack--; } } } if (tokenSet.Count == 0) { // An empty token set was built. // This can happened only when the file has nothing (empty file). // Continue the loop, the loop will be terminated automatically. continue; } if (tokenSet.Count == 1) { //Throw exception as the token set does not have an entry name. throw new BibTeXParserException(Properties.Resources.BIBTEXPARSER_NOT_WELL_FORMED, tokenSet[0].LineNumber, tokenSet[0].ColumnNumber, String.Empty); } // Check for entry type and process accordingly. switch (tokenSet[1].Value.ToUpperInvariant()) { case BibTeXHelper.BT_ENTRY_VARIABLE_TYPE: #region Process Variable Declaration @STRING try { ParseSymbol(tokenSet, ref symbolTable); } catch (BibTeXParserException pException) { if (this._behavior == BibTeXParserBehavior.StopOnFirstError) { // Break the parsing, and return from the function // Save the exception for later notification and return false as some error occured this._parserExceptions.Add(pException); return(false); } else if (this._behavior == BibTeXParserBehavior.IgnoreParseErrors) { // Save the exception for later notification, and continue. this._parserExceptions.Add(pException); } } #endregion break; case BibTeXHelper.BT_ENTRY_COMMENT_TYPE: #region Process Comments @COMMENT //simply ignore this set of token #endregion break; default: #region Process the BibTeX entry @AUTHOR, @BOOK, @ARTICLE, etc; // Catch only the parsing exception. If any other exceptions (like IOException) then throw them out immediately try { bibTeXEntries.Add(parser.GetBibTeXEntry(tokenSet)); } catch (BibTeXParserException pException) { if (this._behavior == BibTeXParserBehavior.StopOnFirstError) { // Break the parsing, and return from the function // Save the exception for later notification and return false as some error occurred this._parserExceptions.Add(pException); return(false); } else if (this._behavior == BibTeXParserBehavior.IgnoreParseErrors) { // Save the exception for later notification, and continue. this._parserExceptions.Add(pException); } } #endregion break; } } catch (BibTeXParserException pException) { if (this._behavior == BibTeXParserBehavior.StopOnFirstError) { // Break the parsing, and return from the function // Save the exception for later notification and return false as some error occurred this._parserExceptions.Add(pException); return(false); } else if (this._behavior == BibTeXParserBehavior.IgnoreParseErrors) { // Save the exception for later notification, and continue. this._parserExceptions.Add(pException); } } #endregion } // while // Update state of the Document only if successfully parsed (in case of StopOnFirstError) if ((this._behavior == BibTeXParserBehavior.StopOnFirstError && this._parserExceptions.Count == 0) || this._behavior == BibTeXParserBehavior.IgnoreParseErrors ) { this._allBibTexEntries = bibTeXEntries; this._symbolTable = symbolTable; } if (this._parserExceptions.Count == 0) { return(true); } else { return(false); } }
/// <summary> /// Format value,line and column number of BibTeXToken object /// </summary> /// <param name="bibToken">The bib token.</param> private static void FormatToken(BibTeXToken bibToken) { string tokenWithoutNewLine = string.Empty; int indexOfNewLineInToken = 0; if (bibToken.Value.Replace(Properties.Resources.SPACE, string.Empty).Replace(BibTeXHelper.BT_ENTRY_TAB, string.Empty).StartsWith(System.Environment.NewLine, StringComparison.OrdinalIgnoreCase)) { // Update column number of token if token's value start with the new line bibToken.ColumnNumber = 1; // Get index of first character in trimmed token value int indexOfFirstCharacter = bibToken.Value.Length - bibToken.Value.TrimStart().Length; // Get last index of new line before "indexOfFirstCharacter" inside the token value indexOfNewLineInToken = bibToken.Value.LastIndexOf(System.Environment.NewLine, indexOfFirstCharacter, indexOfFirstCharacter + 1, StringComparison.OrdinalIgnoreCase); tokenWithoutNewLine = bibToken.Value.Substring((indexOfNewLineInToken) + 2); } else { tokenWithoutNewLine = bibToken.Value.Substring(indexOfNewLineInToken); } int tokenColumnNumber = bibToken.ColumnNumber + tokenWithoutNewLine.Length - tokenWithoutNewLine.TrimStart().Length; bibToken.ColumnNumber = tokenColumnNumber; int numberOfNewLineInToken = 0; int indexOfNewLine = -1; StringBuilder formatedInput = new StringBuilder(); bibToken.Value = bibToken.Value.TrimEnd(); string bibTexTokenValue = bibToken.Value; while (bibToken.Value.Contains(System.Environment.NewLine)) { if (bibTexTokenValue.Replace(Properties.Resources.SPACE, string.Empty).Replace(BibTeXHelper.BT_ENTRY_TAB, string.Empty).StartsWith(System.Environment.NewLine, StringComparison.OrdinalIgnoreCase)) { numberOfNewLineInToken++; bibTexTokenValue = bibTexTokenValue.Substring(bibTexTokenValue.IndexOf (System.Environment.NewLine, StringComparison.OrdinalIgnoreCase) + 2); } indexOfNewLine = bibToken.Value.IndexOf(System.Environment.NewLine, StringComparison.OrdinalIgnoreCase); if (formatedInput.Length != 0) { formatedInput.Append(Properties.Resources.SPACE); } if (indexOfNewLine >= 0) { formatedInput.Append(bibToken.Value.Substring(0, indexOfNewLine).Trim()); } bibToken.Value = bibToken.Value.Substring(bibToken.Value.IndexOf (BibTeXHelper.BT_CHAR_ENTRY_NEWLINE) + 1); } bibToken.LineNumber = bibToken.LineNumber + numberOfNewLineInToken; formatedInput.Append(Properties.Resources.SPACE + bibToken.Value.Trim()); bibToken.Value = formatedInput.ToString().Trim(); //return bibToken; }