/* Function: IsHorizontalLine * Returns whether the passed <LineIterator> is at a horizontal line, not including any comment symbols or decoration. */ public static bool IsHorizontalLine(LineIterator line) { TokenIterator start, end; line.GetBounds(LineBoundsMode.CommentContent, out start, out end); char symbolA, symbolB, symbolC; int symbolACount, symbolBCount, symbolCCount; bool lineResult = CountSymbolLine(ref start, end, out symbolA, out symbolB, out symbolC, out symbolACount, out symbolBCount, out symbolCCount); // Return false if it's not the only thing on the line. if (lineResult == false || start != end) { return(false); } else if (symbolACount >= 4 && (symbolBCount == 0 || (symbolBCount <= 3 && symbolCCount == 0))) { return(true); } else if (symbolACount <= 3 && symbolBCount >= 4 && (symbolCCount == 0 || symbolCCount <= 3)) { return(true); } else { return(false); } }
/* Function: CountEdgeSymbols * * An internal function that detects whether the <LineIterator> has symbols on its left and/or right sides. The * symbols must be no longer than three characters and be separated by whitespace from any other content on * the line. If either edge has symbols it will return true along with what they are and how many. If neither do it * will return false. The variables will be set to the null character and a zero count for any edge that doesn't have * symbols. * * symbolIsAloneOnLine is set to true if there was only one symbol on the line and no other content. The symbol * and count will be returned as the left side, but it's possible it would be intended for the right. For example, * look at this comment box: * * > //////////// * > // text // * > // // * > // text // * > //////////// * * Marking the comment symbols will leave the resulting content like this when using * <LineBoundsMode.CommentContent>: * * > ////////// * > text // * > // * > text // * > ////////// * * The slashes on the middle line will be returned as the left symbol but is meant to be the right. Make sure you * handle this situation correctly. */ private static bool CountEdgeSymbols(LineIterator line, out char leftSymbol, out char rightSymbol, out int leftSymbolCount, out int rightSymbolCount, out bool symbolIsAloneOnLine) { symbolIsAloneOnLine = false; TokenIterator lineStart, lineEnd; line.GetBounds(LineBoundsMode.CommentContent, out lineStart, out lineEnd); if (CountSymbols(ref lineStart, lineEnd, out leftSymbol, out leftSymbolCount)) { if ((lineStart.FundamentalType != FundamentalType.Whitespace && lineStart != lineEnd) || leftSymbolCount > 3) { leftSymbol = '\0'; leftSymbolCount = 0; } while (lineStart.FundamentalType == FundamentalType.Whitespace && lineStart < lineEnd) { lineStart.Next(); } if (lineStart == lineEnd) { symbolIsAloneOnLine = true; } } if (ReverseCountSymbols(lineStart, ref lineEnd, out rightSymbol, out rightSymbolCount)) { lineEnd.Previous(); if ((lineEnd >= lineStart && lineEnd.FundamentalType != FundamentalType.Whitespace) || rightSymbolCount > 3) { rightSymbol = '\0'; rightSymbolCount = 0; } } return(leftSymbolCount != 0 || rightSymbolCount != 0); }
// Group: Support Functions // __________________________________________________________________________ /* Function: TryToGetBlockComment * * If the iterator is on a line that starts with the opening symbol of a block comment, this function moves the iterator * past the entire comment and returns true. If the comment is a candidate for documentation it will also return it as * a <PossibleDocumentationComment> and mark the symbols as <CommentParsingType.CommentSymbol>. If the * line does not start with an opening comment symbol it will return false and leave the iterator where it is. * * Not all the block comments it finds will be candidates for documentation, since some will have text after the closing * symbol, so it's possible for this function to return true and have comment be null. This is important because in Lua * the block comment symbol is --[[ and the line comment symbol is --, so if we didn't move past the block comment * it could be interpreted as a line comment as well. * * If openingMustBeAlone is set, that means no symbol can appear immediately after the opening symbol. If it does * the function will return false and not move past the comment. This allows you to specifically detect something like * /** without also matching /******. */ protected bool TryToGetBlockComment(ref LineIterator lineIterator, string openingSymbol, string closingSymbol, bool openingMustBeAlone, out PossibleDocumentationComment comment) { TokenIterator firstToken, endOfLine; lineIterator.GetBounds(LineBoundsMode.ExcludeWhitespace, out firstToken, out endOfLine); if (firstToken.MatchesAcrossTokens(openingSymbol) == false) { comment = null; return(false); } // Advance past the opening symbol because it's possible for it to be the same as the closing one, such as with // Python's ''' and """ strings. firstToken.NextByCharacters(openingSymbol.Length); if (openingMustBeAlone && firstToken.FundamentalType == FundamentalType.Symbol) { comment = null; return(false); } comment = new PossibleDocumentationComment(); comment.Start = lineIterator; var tokenizer = lineIterator.Tokenizer; var lookahead = lineIterator; for (;;) { TokenIterator closingSymbolIterator; if (tokenizer.FindTokensBetween(closingSymbol, false, firstToken, endOfLine, out closingSymbolIterator) == true) { // Move past the end of the comment regardless of whether it's acceptable for documentation or not lookahead.Next(); // Make sure nothing appears after the closing symbol on the line closingSymbolIterator.NextByCharacters(closingSymbol.Length); closingSymbolIterator.NextPastWhitespace(); if (closingSymbolIterator.FundamentalType != FundamentalType.LineBreak && closingSymbolIterator.FundamentalType != FundamentalType.Null) { comment = null; } else { comment.End = lookahead; } break; } lookahead.Next(); // If we're not in bounds that means there was an unclosed comment at the end of the file. Skip it but don't treat // it as a documentation candidate. if (!lookahead.IsInBounds) { comment = null; break; } lookahead.GetBounds(LineBoundsMode.ExcludeWhitespace, out firstToken, out endOfLine); } if (comment != null) { // Mark the symbols before returning firstToken = comment.Start.FirstToken(LineBoundsMode.ExcludeWhitespace); firstToken.SetCommentParsingTypeByCharacters(CommentParsingType.CommentSymbol, openingSymbol.Length); LineIterator lastLine = lookahead; lastLine.Previous(); lastLine.GetBounds(LineBoundsMode.ExcludeWhitespace, out firstToken, out endOfLine); endOfLine.PreviousByCharacters(closingSymbol.Length); endOfLine.SetCommentParsingTypeByCharacters(CommentParsingType.CommentSymbol, closingSymbol.Length); } // If we made it this far that means we found a comment and can move the line iterator and return true. Whether // that comment was suitable for documentation will be determined by the comment variable, but we are moving the // iterator and returning true either way. lineIterator = lookahead; return(true); }
// Function: GetPossibleDocumentationComments // // Goes through the file looking for comments that could possibly contain documentation and returns them as a list. These // comments are not guaranteed to have documentation in them, just to be acceptable candidates for them. If there are no // comments it will return an empty list. // // All the comments in the returned list will have their comment symbols marked as <CommentParsingType.CommentSymbol> // in the tokenizer. This allows further operations to be done on them in a language independent manner. If you want to also // filter out text boxes and lines, use <Comments.LineFinder>. // // Default Implementation: // // The default implementation uses the comment symbols found in <Language> or passed to the constructor. You can override // this function if you need to do something more sophisticated, such as interpret the POD directives in Perl. // // Comments must be alone on a line to be a candidate for documentation, meaning that the comment symbol must be the // first non-whitespace character on a line, and in the case of block comments, nothing but whitespace may trail the closing // symbol. The latter rule is important because a comment may start correctly but not end so, as in this prototype with Splint // annotation: // // > int get_array(integer_t id, // > /*@out@*/ array_t array); // // Speaking of which, block comments surrounded by @ symbols are not included because they're Splint comments. Not // including them in the possible documentation comments list means the Splint comment below won't end prototype detection. // // > void initialize () // > /*@globals undef globnum, // > undef globname @*/ // > { ... } // // It also goes through the code line by line in a simple manner, not accounting for things like strings, so if a language contains // a multiline string whose content looks like a language comment it will be interpreted as one. This isn't ideal but is accepted // as a conscious tradeoff because there are actually many different string formats (literal quotes denoted with \", literal quotes // denoted with "", Perl's q{} forms and here doc) so you can't account for them all in a generalized way. Also, putting this in // an independent stage even when using full language support means comments don't disappear the way prototypes do if the // parser gets tripped up on something like an unmatched brace. // virtual public List <PossibleDocumentationComment> GetPossibleDocumentationComments(Tokenizer source) { List <PossibleDocumentationComment> possibleDocumentationComments = new List <PossibleDocumentationComment>(); LineIterator lineIterator = source.FirstLine; while (lineIterator.IsInBounds) { bool foundComment = false; PossibleDocumentationComment possibleDocumentationComment = null; // Javadoc block comments // We test for these before regular block comments because they are usually extended versions of them, such // as /** and /*. if (javadocBlockCommentStringPairs != null) { for (int i = 0; foundComment == false && i < javadocBlockCommentStringPairs.Length; i += 2) { foundComment = TryToGetBlockComment(ref lineIterator, javadocBlockCommentStringPairs[i], javadocBlockCommentStringPairs[i + 1], true, out possibleDocumentationComment); } if (possibleDocumentationComment != null) { possibleDocumentationComment.Javadoc = true; } } // Plain block comments // We test block comments ahead of line comments because in Lua the line comments are a substring of them: -- // versus --[[ and ]]--. if (foundComment == false && blockCommentStringPairs != null) { for (int i = 0; foundComment == false && i < blockCommentStringPairs.Length; i += 2) { foundComment = TryToGetBlockComment(ref lineIterator, blockCommentStringPairs[i], blockCommentStringPairs[i + 1], false, out possibleDocumentationComment); } // Skip Splint comments so that they can appear in prototypes. if (possibleDocumentationComment != null && possibleDocumentationComment.Start.FirstToken(LineBoundsMode.CommentContent).Character == '@') { LineIterator lastLine = possibleDocumentationComment.End; lastLine.Previous(); TokenIterator lastToken, ignore; lastLine.GetBounds(LineBoundsMode.CommentContent, out ignore, out lastToken); lastToken.Previous(); if (lastToken.Character == '@') { possibleDocumentationComment = null; } } } // XML line comments if (foundComment == false && xmlLineCommentStrings != null) { for (int i = 0; foundComment == false && i < xmlLineCommentStrings.Length; i++) { foundComment = TryToGetLineComment(ref lineIterator, xmlLineCommentStrings[i], xmlLineCommentStrings[i], true, out possibleDocumentationComment); } if (possibleDocumentationComment != null) { possibleDocumentationComment.XML = true; } } // Ambiguous XML/Javadoc line comments // If an XML comment is found we check the same position for Javadoc because they may share an opening // symbol, such as ///. if (possibleDocumentationComment != null && possibleDocumentationComment.XML == true && javadocLineCommentStringPairs != null) { LineIterator javadocLineIterator = possibleDocumentationComment.Start; PossibleDocumentationComment possibleJavadocDocumentationComment = null; bool foundJavadocComment = false; for (int i = 0; foundJavadocComment == false && i < javadocLineCommentStringPairs.Length; i += 2) { foundJavadocComment = TryToGetLineComment(ref javadocLineIterator, javadocLineCommentStringPairs[i], javadocLineCommentStringPairs[i + 1], true, out possibleJavadocDocumentationComment); } if (possibleJavadocDocumentationComment != null) { // If the Javadoc comment is longer we use that instead of the XML since it may have detected the first // line as XML and ignored the rest for not having the same symbol. For example: // // ## Comment // # // # // // This will be detected as a one line XML comment and a three line Javadoc comment. if (possibleJavadocDocumentationComment.End.LineNumber > possibleDocumentationComment.End.LineNumber) { possibleDocumentationComment = possibleJavadocDocumentationComment; possibleDocumentationComment.Javadoc = true; lineIterator = javadocLineIterator; } // If they're the same length... else if (possibleJavadocDocumentationComment.End.LineNumber == possibleDocumentationComment.End.LineNumber) { // If the comments are both one line long then it's genuinely ambiguous. For example: // // ## Comment // // Is that a one line XML comment or a one line Javadoc comment? We can't tell, so mark it as // potentially either. if (possibleDocumentationComment.Start.LineNumber == possibleDocumentationComment.End.LineNumber - 1) { possibleDocumentationComment.Javadoc = true; // XML should already be set to true } // If the comments are equal length but more than one line then it's just interpreting the XML as // a Javadoc start with a vertical line for the remainder, so leave it as XML. For example: // // ## Comment // ## // ## // // That's clearly a three line XML comment and not a Javadoc comment with a vertical line. } // If the XML comment is longer just leave it and ignore the Javadoc one. } } // Javadoc line comments if (foundComment == false && javadocLineCommentStringPairs != null) { for (int i = 0; foundComment == false && i < javadocLineCommentStringPairs.Length; i += 2) { foundComment = TryToGetLineComment(ref lineIterator, javadocLineCommentStringPairs[i], javadocLineCommentStringPairs[i + 1], true, out possibleDocumentationComment); } if (possibleDocumentationComment != null) { possibleDocumentationComment.Javadoc = true; } } // Plain line comments if (foundComment == false && lineCommentStrings != null) { for (int i = 0; foundComment == false && i < lineCommentStrings.Length; i++) { foundComment = TryToGetLineComment(ref lineIterator, lineCommentStrings[i], lineCommentStrings[i], false, out possibleDocumentationComment); } } // Nada. if (foundComment == false) { lineIterator.Next(); } else { if (possibleDocumentationComment != null) { // XML can actually use the Javadoc comment format in addition to its own. if (possibleDocumentationComment.Javadoc == true) { possibleDocumentationComment.XML = true; } possibleDocumentationComments.Add(possibleDocumentationComment); } // lineIterator would have been moved already if foundComment is true } } return(possibleDocumentationComments); }
// Function: GetPossibleDocumentationComments // // Goes through the file looking for comments that could possibly contain documentation and returns them as a list. These // comments are not guaranteed to have documentation in them, just to be acceptable candidates for them. If there are no // comments it will return an empty list. // // All the comments in the returned list will have their comment symbols marked as <CommentParsingType.CommentSymbol> // in the tokenizer. This allows further operations to be done on them in a language independent manner. If you want to also // filter out text boxes and lines, use <Comments.LineFinder>. // override public List <PossibleDocumentationComment> GetPossibleDocumentationComments(Tokenizer source) { List <PossibleDocumentationComment> comments = new List <PossibleDocumentationComment>(); LineIterator lineIterator = source.FirstLine; PODLineType podLineType; while (lineIterator.IsInBounds) { TokenIterator tokenIterator = lineIterator.FirstToken(LineBoundsMode.ExcludeWhitespace); // Hash comments if (tokenIterator.Character == '#') { PossibleDocumentationComment comment = new PossibleDocumentationComment(); comment.Start = lineIterator; // First line if (tokenIterator.MatchesAcrossTokens("###")) { comment.Javadoc = false; comment.XML = true; } else if (tokenIterator.MatchesAcrossTokens("##")) { comment.Javadoc = true; comment.XML = false; } else // just "#" { comment.Javadoc = false; comment.XML = false; } lineIterator.Next(); // Subsequent lines while (lineIterator.IsInBounds) { tokenIterator = lineIterator.FirstToken(LineBoundsMode.ExcludeWhitespace); if (tokenIterator.Character != '#') { break; } if (tokenIterator.MatchesAcrossTokens("###")) { comment.Javadoc = false; // XML is still possible } else if (tokenIterator.MatchesAcrossTokens("##")) { comment.Javadoc = false; comment.XML = false; } else // just "#" { // Javadoc is still possible comment.XML = false; } lineIterator.Next(); } comment.End = lineIterator; comments.Add(comment); // Go back and mark the tokens int firstLineCount, subsequentLineCount; if (comment.XML) { firstLineCount = 3; subsequentLineCount = 3; } else if (comment.Javadoc) { firstLineCount = 2; subsequentLineCount = 1; } else // plain { firstLineCount = 1; subsequentLineCount = 1; } LineIterator temp = comment.Start; tokenIterator = temp.FirstToken(LineBoundsMode.ExcludeWhitespace); tokenIterator.SetCommentParsingTypeByCharacters(CommentParsingType.CommentSymbol, firstLineCount); temp.Next(); while (temp < comment.End) { tokenIterator = temp.FirstToken(LineBoundsMode.ExcludeWhitespace); tokenIterator.SetCommentParsingTypeByCharacters(CommentParsingType.CommentSymbol, subsequentLineCount); temp.Next(); } // XML can actually appear in Javadoc comments if (comment.Javadoc) { comment.XML = true; } } // POD comments else if (TryToSkipPODLine(ref tokenIterator, out podLineType)) { TokenIterator podLineStart, podLineEnd; lineIterator.GetBounds(LineBoundsMode.CommentContent, out podLineStart, out podLineEnd); lineIterator.Tokenizer.SetCommentParsingTypeBetween(podLineStart, podLineEnd, CommentParsingType.CommentSymbol); if (podLineType == PODLineType.StartNaturalDocs || podLineType == PODLineType.StartJavadoc) { PossibleDocumentationComment comment = new PossibleDocumentationComment(); comment.Start = lineIterator; if (podLineType == PODLineType.StartJavadoc) { comment.Javadoc = true; } for (;;) { lineIterator.Next(); if (lineIterator.IsInBounds == false) { break; } tokenIterator = lineIterator.FirstToken(LineBoundsMode.CommentContent); if (TryToSkipPODLine(ref tokenIterator, out podLineType) == true) { if (podLineType == PODLineType.End) { lineIterator.GetBounds(LineBoundsMode.CommentContent, out podLineStart, out podLineEnd); lineIterator.Tokenizer.SetCommentParsingTypeBetween(podLineStart, podLineEnd, CommentParsingType.CommentSymbol); lineIterator.Next(); } break; } } comment.End = lineIterator; comments.Add(comment); } else { lineIterator.Next(); } } else { lineIterator.Next(); } } return(comments); }
/* Function: MarkTextBoxes * * Finds all text boxes in a comment and marks their tokens as <Tokenization.CommentParsingType.CommentDecoration>. * Vertical lines will only be detected if they are continuous throughout the comment and horizontal lines if they * are connected to it. Freestanding horizontal lines are *not* detected here. This function tolerates differing * symbols on corners and where embedded horizontal lines connect to the vertical. It also tolerates tokens * marked with <Tokenization.CommentParsingType.CommentSymbol> differing. * * Examples: * * The box below will be marked completely, including the middle horizontal line. * * > // +----------+ * > // | Title | * > // +----------+ * > // | Text | * > // +----------+ * * The middle horizontal line below will not be marked, because it is not attached. * * > // +----------+ * > // | Title | * > // | -------- | * > // | Text | * > // +----------+ * * Nor will the horizontal line below since there is no vertical. * * > // Title * > // ---------- * > // Text * * Freestanding horizontal lines are not detected because they may be intended literally, such as when part of * a code section. If you're not in such a section use <IsHorizontalLine()> before parsing a line to filter it out. * * > // (start code) * > // +-----+ * > // | box | * > // +-----+ * > // (end code) */ public static void MarkTextBoxes(PossibleDocumentationComment comment) { char symbolA, symbolB, symbolC; int symbolACount, symbolBCount, symbolCCount; char leftSymbol = '\0'; char rightSymbol = '\0'; int leftSymbolCount = 0; int rightSymbolCount = 0; bool symbolIsAloneOnLine = false; bool testedForVerticalLines = false; LineIterator line = comment.Start; TokenIterator lineStart, lineEnd; // This should be okay to use since line numbers start at one. IDObjects.NumberSet horizontalLines = new IDObjects.NumberSet(); // Skip leading blank lines since it's okay if they're not part of the text box. while (line < comment.End && line.IsEmpty(LineBoundsMode.CommentContent)) { line.Next(); } while (line < comment.End && line.IsEmpty(LineBoundsMode.CommentContent) == false) { line.GetBounds(LineBoundsMode.ExcludeWhitespace, out lineStart, out lineEnd); // Shrink the line to exclude its comment symbols, if any. We didn't do this using the line bounds mode because // we need to know whether there was any whitespace between them and any horizontal lines. bool commentSymbolWithoutWhitespaceAtStart = false; bool commentSymbolWithoutWhitespaceAtEnd = false; if (lineStart.CommentParsingType == CommentParsingType.CommentSymbol) { commentSymbolWithoutWhitespaceAtStart = true; do { lineStart.Next(); }while (lineStart.CommentParsingType == CommentParsingType.CommentSymbol); if (lineStart.FundamentalType == FundamentalType.Whitespace) { commentSymbolWithoutWhitespaceAtStart = false; do { lineStart.Next(); }while (lineStart.FundamentalType == FundamentalType.Whitespace); } } lineEnd.Previous(); if (lineEnd.CommentParsingType == CommentParsingType.CommentSymbol) { commentSymbolWithoutWhitespaceAtEnd = true; do { lineEnd.Previous(); }while (lineEnd.CommentParsingType == CommentParsingType.CommentSymbol); if (lineEnd.FundamentalType == FundamentalType.Whitespace) { commentSymbolWithoutWhitespaceAtEnd = false; do { lineEnd.Previous(); }while (lineEnd.FundamentalType == FundamentalType.Whitespace); } } lineEnd.Next(); // Horizontal line detection bool isHorizontalLine = false; CountSymbolLine(ref lineStart, lineEnd, out symbolA, out symbolB, out symbolC, out symbolACount, out symbolBCount, out symbolCCount); if (commentSymbolWithoutWhitespaceAtStart == true && commentSymbolWithoutWhitespaceAtEnd == true && symbolACount >= 4 && symbolBCount == 0) { isHorizontalLine = true; } else if (commentSymbolWithoutWhitespaceAtStart == true && symbolACount >= 4 && (symbolBCount == 0 || (symbolBCount <= 3 && symbolCCount == 0))) { isHorizontalLine = true; } else if (commentSymbolWithoutWhitespaceAtEnd == true && ((symbolACount >= 1 && symbolACount <= 3 && symbolBCount >= 4 && symbolCCount == 0) || (symbolACount >= 4 && symbolBCount == 0))) { isHorizontalLine = true; } else if ((symbolACount >= 4 && symbolBCount == 0) || (symbolACount >= 1 && symbolACount <= 3 && symbolBCount >= 4 && symbolCCount <= 3)) { isHorizontalLine = true; } // The horizontal line has to be the only thing on the line to count. if (isHorizontalLine && lineStart == lineEnd) { horizontalLines.Add(line.LineNumber); } // Vertical line detection else if (testedForVerticalLines == false) { // We permit the very first line to be different to allow for this: // /** text // * text // */ // // However, don't skip the first line if it's a one line comment or we wouldn't be able to handle this: // ### text // if (line != comment.Start || (comment.End.LineNumber - comment.Start.LineNumber) == 1) { if (CountEdgeSymbols(line, out leftSymbol, out rightSymbol, out leftSymbolCount, out rightSymbolCount, out symbolIsAloneOnLine) == false) { return; } testedForVerticalLines = true; } } else // testedForVerticalLines == true { char lineLeftSymbol, lineRightSymbol; int lineLeftSymbolCount, lineRightSymbolCount; bool lineSymbolIsAloneOnLine; CountEdgeSymbols(line, out lineLeftSymbol, out lineRightSymbol, out lineLeftSymbolCount, out lineRightSymbolCount, out lineSymbolIsAloneOnLine); // Account for a lone symbol being the right symbol. if (lineSymbolIsAloneOnLine == true && symbolIsAloneOnLine == false && leftSymbolCount == 0 && rightSymbolCount > 0) { if (lineLeftSymbol != rightSymbol || lineLeftSymbolCount != rightSymbolCount) { return; } } else if (lineSymbolIsAloneOnLine == false && symbolIsAloneOnLine == true && lineLeftSymbolCount == 0 && lineRightSymbolCount > 0) { if (lineRightSymbol != leftSymbol || lineRightSymbolCount != leftSymbolCount) { return; } rightSymbol = leftSymbol; leftSymbol = '\0'; rightSymbolCount = leftSymbolCount; leftSymbolCount = 0; } // Otherwise it's okay to do a straight compare. else { if (lineLeftSymbol != leftSymbol || lineLeftSymbolCount != leftSymbolCount) { leftSymbol = '\0'; leftSymbolCount = 0; } if (lineRightSymbol != rightSymbol || lineRightSymbolCount != rightSymbolCount) { rightSymbol = '\0'; rightSymbolCount = 0; } if (leftSymbolCount == 0 && rightSymbolCount == 0) { return; } } // Turn off the overall alone flag if this line didn't have it. if (lineSymbolIsAloneOnLine == false) { symbolIsAloneOnLine = false; } } line.Next(); } // If we stopped because we hit a blank line, this comment is only acceptable for marking text boxes if all the lines // left are blank. while (line < comment.End && line.IsEmpty(LineBoundsMode.CommentContent)) { line.Next(); } if (line != comment.End) { return; } // If we made it this far without returning it means we have a valid text box which we have to mark as comment decoration. line = comment.Start; while (line < comment.End) { line.GetBounds(LineBoundsMode.CommentContent, out lineStart, out lineEnd); if (horizontalLines.Contains(line.LineNumber)) { while (lineStart < lineEnd) { lineStart.CommentParsingType = CommentParsingType.CommentDecoration; lineStart.Next(); } } else if (lineEnd > lineStart) { // We test the characters against the symbols to account for any exceptions we allowed to go through // in previous code. for (int i = 0; i < leftSymbolCount; i++) { if (lineStart.Character == leftSymbol) { lineStart.CommentParsingType = CommentParsingType.CommentDecoration; lineStart.Next(); } } lineEnd.Previous(); for (int i = 0; i < rightSymbolCount; i++) { if (lineEnd.Character == rightSymbol) { lineEnd.CommentParsingType = CommentParsingType.CommentDecoration; lineEnd.Previous(); } } } line.Next(); } }
// Group: Support Functions // __________________________________________________________________________ /* Function: TryToGetPDBlockComment * * If the line iterator is on the starting symbol of a block comment, return it as a <PossibleDocumentationComment> * and mark the symbols as <CommentParsingType.CommentSymbol>. If the iterator is not on the opening comment * symbol or there is content after the closing comment symbol making it unsuitable as a documentation comment, * returns null. * * If openingMustBeAlone is set, that means no symbol can appear immediately after the opening symbol for this * function to succeed. This allows you to specifically detect something like /** without also matching /******. */ protected PossibleDocumentationComment TryToGetPDBlockComment(LineIterator lineIterator, string openingSymbol, string closingSymbol, bool openingMustBeAlone) { TokenIterator firstToken = lineIterator.FirstToken(LineBoundsMode.ExcludeWhitespace); if (firstToken.MatchesAcrossTokens(openingSymbol) == false) { return(null); } if (openingMustBeAlone) { TokenIterator nextToken = firstToken; nextToken.NextByCharacters(openingSymbol.Length); if (nextToken.FundamentalType == FundamentalType.Symbol) { return(null); } } PossibleDocumentationComment comment = new PossibleDocumentationComment(); comment.Start = lineIterator; for (;;) { if (!lineIterator.IsInBounds) { return(null); } TokenIterator closingSymbolIterator; if (lineIterator.FindAcrossTokens(closingSymbol, false, LineBoundsMode.Everything, out closingSymbolIterator) == true) { closingSymbolIterator.NextByCharacters(closingSymbol.Length); closingSymbolIterator.NextPastWhitespace(); if (closingSymbolIterator.FundamentalType != FundamentalType.LineBreak && closingSymbolIterator.FundamentalType != FundamentalType.Null) { return(null); } lineIterator.Next(); comment.End = lineIterator; break; } lineIterator.Next(); } // Success. Mark the symbols before returning. firstToken.SetCommentParsingTypeByCharacters(CommentParsingType.CommentSymbol, openingSymbol.Length); TokenIterator lastToken; lineIterator.Previous(); lineIterator.GetBounds(LineBoundsMode.ExcludeWhitespace, out firstToken, out lastToken); lastToken.PreviousByCharacters(closingSymbol.Length); lastToken.SetCommentParsingTypeByCharacters(CommentParsingType.CommentSymbol, closingSymbol.Length); return(comment); }
// Function: GetPossibleDocumentationComments // // Goes through the file looking for comments that could possibly contain documentation and returns them as a list. These // comments are not guaranteed to have documentation in them, just to be acceptable candidates for them. If there are no // comments it will return an empty list. // // All the comments in the returned list will have their comment symbols marked as <CommentParsingType.CommentSymbol> // in the tokenizer. This allows further operations to be done on them in a language independent manner. If you want to also // filter out text boxes and lines, use <Comments.LineFinder>. // // Default Implementation: // // The default implementation uses the comment symbols found in <Language> or passed to the constructor. You can override // this function if you need to do something more sophisticated, such as interpret the POD directives in Perl. // // Comments must be alone on a line to be a candidate for documentation, meaning that the comment symbol must be the // first non-whitespace character on a line, and in the case of block comments, nothing but whitespace may trail the closing // symbol. The latter rule is important because a comment may start correctly but not end so, as in this prototype with Splint // annotation: // // > int get_array(integer_t id, // > /*@out@*/ array_t array); // // Speaking of which, block comments surrounded by @ symbols are not included because they're Splint comments. Not // including them in the possible documentation comments list means the Splint comment below won't end prototype detection. // // > void initialize () // > /*@globals undef globnum, // > undef globname @*/ // > { ... } // // It also goes through the code line by line in a simple manner, not accounting for things like strings, so if a language contains // a multiline string whose content looks like a language comment it will be interpreted as one. This isn't ideal but is accepted // as a conscious tradeoff because there are actually many different string formats (literal quotes denoted with \", literal quotes // denoted with "", Perl's q{} forms and here doc) so you can't account for them all in a generalized way. Also, putting this in // an independent stage even when using full language support means comments don't disappear the way prototypes do if the // parser gets tripped up on something like an unmatched brace. // virtual public List <PossibleDocumentationComment> GetPossibleDocumentationComments(Tokenizer source) { List <PossibleDocumentationComment> possibleDocumentationComments = new List <PossibleDocumentationComment>(); LineIterator lineIterator = source.FirstLine; while (lineIterator.IsInBounds) { PossibleDocumentationComment comment = null; // Javadoc block comments // We test for these before regular block comments because they are usually extended versions of them, such // as /** and /*. // We also test block comments in general ahead of line comments because in Lua the line comments are a // substring of them: -- versus --[[ and ]]--. if (javadocBlockCommentStringPairs != null) { for (int i = 0; comment == null && i < javadocBlockCommentStringPairs.Length; i += 2) { comment = TryToGetPDBlockComment(lineIterator, javadocBlockCommentStringPairs[i], javadocBlockCommentStringPairs[i + 1], true); } if (comment != null) { comment.Javadoc = true; } } // Plain block comments if (comment == null && blockCommentStringPairs != null) { for (int i = 0; comment == null && i < blockCommentStringPairs.Length; i += 2) { comment = TryToGetPDBlockComment(lineIterator, blockCommentStringPairs[i], blockCommentStringPairs[i + 1], false); } // Skip Splint comments so that they can appear in prototypes. if (comment != null && comment.Start.FirstToken(LineBoundsMode.CommentContent).Character == '@') { LineIterator lastLine = comment.End; lastLine.Previous(); TokenIterator lastToken, ignore; lastLine.GetBounds(LineBoundsMode.CommentContent, out ignore, out lastToken); lastToken.Previous(); if (lastToken.Character == '@') { comment = null; } } } // XML line comments if (comment == null && xmlLineCommentStrings != null) { for (int i = 0; comment == null && i < xmlLineCommentStrings.Length; i++) { comment = TryToGetPDLineComment(lineIterator, xmlLineCommentStrings[i], xmlLineCommentStrings[i], true); } if (comment != null) { comment.XML = true; } } // Javadoc line comments // We check for these even if a XML comment is found because they may share an opening symbol, such as ///. // We change it to Javadoc if it's longer. If it's equal it's just interpreting the XML as a Javadoc start with a // vertical line for the remainder, so leave it as XML. Unless the comment is only one line long, in which case it's // genuinely ambiguous. if ((comment == null || comment.XML == true) && javadocLineCommentStringPairs != null) { PossibleDocumentationComment javadocComment = null; for (int i = 0; javadocComment == null && i < javadocLineCommentStringPairs.Length; i += 2) { javadocComment = TryToGetPDLineComment(lineIterator, javadocLineCommentStringPairs[i], javadocLineCommentStringPairs[i + 1], true); } if (javadocComment != null) { javadocComment.Javadoc = true; if (comment == null) { comment = javadocComment; } else { int javadocLength = javadocComment.End.LineNumber - javadocComment.Start.LineNumber; int xmlLength = comment.End.LineNumber - comment.Start.LineNumber; if (javadocLength > xmlLength) { comment = javadocComment; } else if (javadocLength == 1 && xmlLength == 1) { comment.Javadoc = true; } // else stay with the XML comment } } } // Plain line comments if (comment == null && lineCommentStrings != null) { for (int i = 0; comment == null && i < lineCommentStrings.Length; i++) { comment = TryToGetPDLineComment(lineIterator, lineCommentStrings[i], lineCommentStrings[i], false); } } // Nada. if (comment == null) { lineIterator.Next(); } else { // XML can actually use the Javadoc comment format in addition to its own. if (comment.Javadoc) { comment.XML = true; } possibleDocumentationComments.Add(comment); lineIterator = comment.End; } } return(possibleDocumentationComments); }
/* Function: TryToGetBlockComment * * If the iterator is on a line that starts with the opening symbol of a block comment, this function moves the iterator * past the entire comment and returns true. If the comment is a candidate for documentation it will also return it as * a <PossibleDocumentationComment> and mark the symbols as <CommentParsingType.CommentSymbol>. If the * line does not start with an opening comment symbol it will return false and leave the iterator where it is. */ protected bool TryToGetBlockComment(ref LineIterator lineIterator, out PossibleDocumentationComment comment) { TokenIterator firstToken, endOfLine; lineIterator.GetBounds(LineBoundsMode.ExcludeWhitespace, out firstToken, out endOfLine); // Are we on a block comment? TokenIterator lookahead = firstToken; string closingSymbol; if (TryToSkipOpeningBlockCommentSymbol(ref lookahead, out closingSymbol) == false) { comment = null; return(false); } // We are. Create a possible documentation comment. comment = new PossibleDocumentationComment(); comment.Start = lineIterator; // Check if we're on a Javadoc comment, which will be an extra [. if (lookahead.Character == '[') { lookahead.Next(); if (lookahead.FundamentalType != FundamentalType.Symbol) { comment.Javadoc = true; } } // Find the end of the comment, which could be on the same line as the start. var tokenizer = lineIterator.Tokenizer; var lineLookahead = lineIterator; bool hadTrailingDashes = false; for (;;) { TokenIterator closingSymbolIterator; if (tokenizer.FindTokensBetween(closingSymbol, false, firstToken, endOfLine, out closingSymbolIterator) == true) { // Move past the end of the comment regardless of whether it's acceptable for documentation or not lineLookahead.Next(); // Make sure nothing appears after the closing symbol on the line closingSymbolIterator.NextByCharacters(closingSymbol.Length); // We'll allow -- though since some people use --[[ and ]]-- for balance even though the latter is actually the // closing comment symbol followed by a line comment. if (closingSymbolIterator.MatchesAcrossTokens("--")) { hadTrailingDashes = true; closingSymbolIterator.Next(2); } closingSymbolIterator.NextPastWhitespace(); if (closingSymbolIterator.FundamentalType != FundamentalType.LineBreak && closingSymbolIterator.FundamentalType != FundamentalType.Null) { comment = null; } else { comment.End = lineLookahead; } break; } lineLookahead.Next(); // If we're not in bounds that means there was an unclosed comment at the end of the file. Skip it but don't treat // it as a documentation candidate. if (!lookahead.IsInBounds) { comment = null; break; } lineLookahead.GetBounds(LineBoundsMode.ExcludeWhitespace, out firstToken, out endOfLine); } if (comment != null) { // Mark the symbols before returning firstToken = comment.Start.FirstToken(LineBoundsMode.ExcludeWhitespace); lookahead = firstToken; TryToSkipOpeningBlockCommentSymbol(ref lookahead, out closingSymbol); if (comment.Javadoc) { lookahead.Next(); } firstToken.SetCommentParsingTypeBetween(lookahead, CommentParsingType.CommentSymbol); LineIterator lastLine = comment.End; lastLine.Previous(); lastLine.GetBounds(LineBoundsMode.ExcludeWhitespace, out firstToken, out endOfLine); lookahead = endOfLine; if (hadTrailingDashes) { lookahead.Previous(2); } lookahead.PreviousByCharacters(closingSymbol.Length); lookahead.SetCommentParsingTypeBetween(endOfLine, CommentParsingType.CommentSymbol); } // If we made it this far that means we found a comment and can move the line iterator and return true. Whether // that comment was suitable for documentation will be determined by the comment variable, but we are moving the // iterator and returning true either way. lineIterator = lineLookahead; return(true); }