private void ForceMergeWithPreviousToken( SymToken aNewToken ) { if ( iCache.Count > 0 ) { if ( CheckIfStateChangeRequiredForEnqueuedToken( aNewToken ) == false ) { SymToken previousOutputToken = PreviousOutputToken; previousOutputToken.ForceCombine( aNewToken ); } } else { EnqueueNewOutputToken( aNewToken ); } }
public void MergeAllTokensWithinRange(int aStartIndex, int aEndIndex, bool aMergeInContinuations, bool aForceMerge) { int count = Count; // System.Diagnostics.Debug.Assert(count > aStartIndex); System.Diagnostics.Debug.Assert(aEndIndex < count); // Have to do this in two passes to ensure token // text remains from left to right. SymToken startingToken = this[aStartIndex++]; if (aForceMerge == false) { // Not force-merging, so need to find a valid combinable starting element while (startingToken.CombiningAllowed == false && aStartIndex < aEndIndex) { startingToken = this[++aStartIndex]; } } // First pass - join tokens for (int i = aStartIndex; i <= aEndIndex; i++) { SymToken thisToken = this[i]; // Ignore continuations during merging if (thisToken.Class != SymToken.TClass.EClassContinuation || aMergeInContinuations) { if (aForceMerge == false) { startingToken.Combine(thisToken); } else { startingToken.ForceCombine(thisToken); } } } // Second pass - discard merged tokens. for (int i = aEndIndex - 1; i >= aStartIndex; i--) { Remove(i); } //System.Diagnostics.Debug.WriteLine( "Merged: " + startingToken.Value ); }
private void ProcessTokenDuringQuotation( SymToken aToken ) { // System.Diagnostics.Debug.Write( "[" + aToken.Value + "] " ); System.Diagnostics.Debug.Assert( iCache.Count > 0 ); #region Quotation examples // 1) "" // 2) "\"" // 3) "\"\"" // 4) '' // 5) '\'' // 6) '\'\'' // 7) "\'\'\'\"\"" // 8) "abc def ghi" // // 9) #define WIBBLE " this is a test string \ // This too" " - and this!" // // 10) #define WIBBLE2 " this is a test string \\ abc \ // This too" " - and this!" // // 11) #pragma message("Quotation with brackets (;') and other \'nasty\' things! inside it__\\"); // #endregion if ( aToken.Class == SymToken.TClass.EClassQuotation ) { #region Token is a quotation ... // Quotation symbol whilst already in a quotation. // We should check whether we have reached // the closing quotation symbol, or then whether // this is possibly just an escaped character? // // See examples 2,3,5,6,7,10,11 SymToken previousToken = PreviousOutputToken; if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) { // Combine the \' or \" with any previous token previousToken.ForceCombine( aToken ); System.Diagnostics.Debug.Assert( iCache.Count > 0 ); } else { // The last token was not an escape marker, so this // is a quotation character all on its own. Since // we always start a new cache run when we first see // a quotation (during "normal" state), then the // first token in the cache forms the basis for the // search character. // // If the number of tokens in the cache with the same // type (as the first token) is even, then we have // reached the end of a quotation. If its odd, then // we're still inside one. SymToken initialQuotationToken = iCache.PeekHead; System.Diagnostics.Debug.Assert( initialQuotationToken.Value.Length == 1 ); System.Diagnostics.Debug.Assert( initialQuotationToken.Class == SymToken.TClass.EClassQuotation ); System.Diagnostics.Debug.Assert( initialQuotationToken.Type == SymToken.TType.ETypeQuotationDouble || initialQuotationToken.Type == SymToken.TType.ETypeQuotationSingle ); if ( initialQuotationToken.Value == aToken.Value ) { // Need to check for a closing quotation. The count in the cache // should be odd (so that adding aToken makes a balanced set of // quotation characters). int count = iCache.CountByType( initialQuotationToken ); int remainder = count % 2; if ( remainder == 1 ) { // Odd number which means that the quotation is treated as complete System.Diagnostics.Debug.Assert( aToken.Value == initialQuotationToken.Value ); EnqueueNewOutputToken( aToken ); #region Try to group all of the text into a logical string // No sense in doing this unless we have more than 3 tokens count = iCache.Count; if ( count > 3 ) { // Assume we have the following string: // "marker.h" // // This is actually represented as 5 tokens:- // // 0 ["] => EClassQuotation // 1 [marker] => EClassQuotation // 2 [.] => EClassQuotation // 3 [h] => EClassQuotation // 4 ["] => EClassQuotation // // We need to merge tokens at indicies 1, 2 and 3 into a // single token. iCache.MergeAllTokensWithinRange( 1, count - 1, false, true ); } #endregion FlushCache(); } else { EnqueueNewOutputToken( aToken ); } } else { // It wasn't the closing quotation, so just queue it up EnqueueNewOutputToken( aToken ); } } #endregion } else { #region Token is not a quotation... // We'll try to combine the tokens as much as is possible. if ( aToken.Class == SymToken.TClass.EClassNewLine ) { #region Handle new line during quotation... // Checking for continuations... // // If the last token was not a backshash marker, then // we should flush the cache (reset state). SymToken previousToken = PreviousOutputToken; if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) { // The last token was an backslash. This means we // are dealing with a similar case to examples 9 & 10. // Discard new line previousToken.Class = SymToken.TClass.EClassContinuation; } else { // The last token wasn't a continuation character // which means this is a "normal" EOL scenario. // Just add the token and flush the cache. Mind you, this actually // means the content is invalid. EnqueueNewOutputToken( aToken ); FlushCache(); } #endregion } else if ( aToken.Class == SymToken.TClass.EClassSymbol && aToken.Value == @"\" ) { SymToken previousToken = PreviousOutputToken; if ( previousToken.Class == SymToken.TClass.EClassSymbol && previousToken.Value == @"\" ) { // Example 10 - an escaped backslash. Combine the // previous token (a backslash) with the new token // then join this new combined token with the previous. // Phew. MergeWithPreviousToken( aToken ); PreviousOutputToken.Class = SymToken.TClass.EClassQuotation; } else { // This should not be combined until we know // what the next character is. EnqueueNewOutputToken( aToken ); } } else { // Irrespective of what class the token is // currently, we treat it as part of a quotation. aToken.Class = SymToken.TClass.EClassQuotation; // If the previous character wasn't a quotation, EnqueueNewOutputToken( aToken ); } #endregion } }