Beispiel #1
0
        /* Function: HashPath
         *
         * Returns the hash path for the topic.  When appending this to the hash path of a file or class use a colon to separate
         * them.
         *
         * Examples:
         *
         *		topic - Member
         *		topic + includeClass - Module.Module.Member
         */
        static public string HashPath(Engine.Topics.Topic topic, bool includeClass = true)
        {
            // We want to work from Topic.Title instead of Topic.Symbol so that we can use the separator characters as originally
            // written, as opposed to having them normalized and condensed in the anchor.

            int titleParametersIndex = ParameterString.GetParametersIndex(topic.Title);

            StringBuilder hashPath;

            if (titleParametersIndex == -1)
            {
                hashPath = new StringBuilder(topic.Title);
            }
            else
            {
                hashPath = new StringBuilder(titleParametersIndex);
                hashPath.Append(topic.Title, 0, titleParametersIndex);
            }

            hashPath.Replace('\t', ' ');

            // Remove all whitespace unless it separates two text characters.
            int i = 0;

            while (i < hashPath.Length)
            {
                if (hashPath[i] == ' ')
                {
                    if (i == 0 || i == hashPath.Length - 1)
                    {
                        hashPath.Remove(i, 1);
                    }
                    else if (Tokenizer.FundamentalTypeOf(hashPath[i - 1]) == FundamentalType.Text &&
                             Tokenizer.FundamentalTypeOf(hashPath[i + 1]) == FundamentalType.Text)
                    {
                        i++;
                    }
                    else
                    {
                        hashPath.Remove(i, 1);
                    }
                }
                else
                {
                    i++;
                }
            }

            // Add parentheses to distinguish between multiple symbols in the same file.
            // xxx this will be a problem when doing class hash paths as symboldefnumber is only unique to a file
            if (topic.SymbolDefinitionNumber != 1)
            {
                hashPath.Append('(');
                hashPath.Append(topic.SymbolDefinitionNumber);
                hashPath.Append(')');
            }

            // Add class if present and desired.
            // xxx when class id is included in topic test for that here, maybe instead of having a flag
            if (includeClass)
            {
                // Find the part of the symbol that isn't generated by the title, if any.
                string ignore;
                string titleSymbol = SymbolString.FromPlainText(topic.Title, out ignore).ToString();
                string fullSymbol  = topic.Symbol.ToString();

                if (titleSymbol.Length < fullSymbol.Length &&
                    fullSymbol.Substring(fullSymbol.Length - titleSymbol.Length) == titleSymbol)
                {
                    string classSymbol = fullSymbol.Substring(0, fullSymbol.Length - titleSymbol.Length);
                    classSymbol = classSymbol.Replace(SymbolString.SeparatorChar, '.');

                    // The class symbol should already have a trailing member operator.
                    hashPath.Insert(0, classSymbol);
                }
            }

            return(Utilities.Sanitize(hashPath.ToString()));
        }
Beispiel #2
0
        // Group: Link Scoring Functions
        // __________________________________________________________________________


        /* Function: Score
         *
         * Generates a numeric score representing how well the <Topic> serves as a match for the <Link>.  Higher scores are
         * better, and zero means they don't match at all.
         *
         * If a score has to beat a certain threshold to be relevant, you can pass it to lessen the processing load.  This function
         * may be able to tell it can't beat the score early and return without performing later steps.  In these cases it will return
         * -1.
         *
         * If scoring a Natural Docs link you must pass a list of interpretations.  It must include the literal form.
         */
        public long Score(Link link, Topic topic, long minimumScore = 0, List <LinkInterpretation> interpretations = null)
        {
            // DEPENDENCY: These things depend on the score's internal format:
            //   - EngineTests.LinkScoring
            //   - Link.TargetInterepretationIndex

            // Other than that the score's format should be treated as opaque.  Nothing beyond this class should try to
            // interpret the value other than to know that higher is better, zero is not a match, and -1 means we quit early.

            // It's a 64-bit value so we'll assign bits to the different characteristics.  Higher order bits obviously result in higher
            // numeric values so the characteristics are ordered by priority.

            // Format:
            // 0LCETPPP PPPPPPPP PPPPPPPP PSSSSSSS SSSIIIII IBFFFFFF Rbbbbbbb brrrrrr1

            // 0 - The first bit is zero to make sure the number is positive.

            // L - Whether the topic matches the link's language.
            // C - Whether the topic and link's capitalization match if it matters to the language.
            // E - Whether the text is an exact match with no plural or possessive conversions applied.
            // T - Whether the link parameters exactly match the topic title parameters.
            // P - How well the parameters match.
            // S - How high on the scope list the symbol match is.
            // I - How high on the interpretation list (named/plural/possessive) the match is.
            // B - Whether the topic has a body
            // F - How high on the list of topics that define the same symbol in the same file this is.
            // R - Whether the topic has a prototype.
            // b - The length of the body divided by 16.
            // r - The length of the prototype divided by 16.

            // 1 - The final bit is one to make sure a match will never be zero.


            // For type and class parent links, the comment type MUST have the relevant attribute set to be possible.

            var commentType = EngineInstance.CommentTypes.FromID(topic.CommentTypeID);
            var language    = EngineInstance.Languages.FromID(topic.LanguageID);

            if ((link.Type == LinkType.ClassParent && commentType.InClassHierarchy == false) ||
                (link.Type == LinkType.Type && commentType.IsVariableType == false))
            {
                return(0);
            }


            // 0------- -------- -------- -------- -------- -------- -------- -------1
            // Our baseline.

            long score = 0x0000000000000001;


            // =L------ -------- -------- -------- -------- -------- -------- -------=
            // L - Whether the topic's language matches the link's language.  For type and class parent links this is mandatory.  For
            // Natural Docs links this is the highest priority criteria as links should favor any kind of match within their own language
            // over matches from another.

            if (link.LanguageID == topic.LanguageID)
            {
                score |= 0x4000000000000000;
            }
            else if (link.Type == LinkType.ClassParent || link.Type == LinkType.Type)
            {
                return(0);
            }
            else if (minimumScore > 0x3FFFFFFFFFFFFFFF)
            {
                return(-1);
            }


            // ==CE---- -------- -------- -SSSSSSS SSSIIIII I------- -------- -------=
            // Now we have to go through the interpretations to figure out the fields that could change based on them.
            // C and S will be handled by ScoreInterpretation().  E and I will be handled here.

            // C - Whether the topic and link's capitalization match if it matters to the language.  This depends on the
            //		 interpretation because it can be affected by how named links are split.
            // E - Whether the text is an exact match with no plural or possessive conversions applied.  Named links are
            //		 okay.
            // S - How high on the scope list the symbol match is.
            // I - How high on the interpretation list (named/plural/possessive) the match is.

            long bestInterpretationScore = 0;
            int  bestInterpretationIndex = 0;

            if (link.Type == LinkType.NaturalDocs)
            {
                for (int i = 0; i < interpretations.Count; i++)
                {
                    long interpretationScore = ScoreInterpretation(topic, link, SymbolString.FromPlainText_NoParameters(interpretations[i].Target));

                    if (interpretationScore != 0)
                    {
                        // Add E if there were no plurals or possessives.  Named links are okay.
                        if (interpretations[i].PluralConversion == false && interpretations[i].PossessiveConversion == false)
                        {
                            interpretationScore |= 0x1000000000000000;
                        }

                        if (interpretationScore > bestInterpretationScore)
                        {
                            bestInterpretationScore = interpretationScore;
                            bestInterpretationIndex = i;
                        }
                    }
                }
            }

            else             // type or class parent link
            {
                bestInterpretationScore = ScoreInterpretation(topic, link, link.Symbol);
                bestInterpretationIndex = 0;

                // Add E if there was a match.
                if (bestInterpretationScore != 0)
                {
                    bestInterpretationScore |= 0x1000000000000000;
                }
            }

            // If none of the symbol interpretations matched the topic, we're done.
            if (bestInterpretationScore == 0)
            {
                return(0);
            }

            // Combine C, E, and S into the main score.
            score |= bestInterpretationScore;

            // Calculate I so that lower indexes are higher scores.  Since these are the lowest order bits it's okay to leave
            // this for the end instead of calculating it for every interpretation.
            if (bestInterpretationIndex > 63)
            {
                bestInterpretationIndex = 63;
            }

            long bestInterpretationBits = 63 - bestInterpretationIndex;

            bestInterpretationBits <<= 23;

            score |= bestInterpretationBits;

            if ((score | 0x0FFFFF80007FFFFF) < minimumScore)
            {
                return(-1);
            }


            // ====TPPP PPPPPPPP PPPPPPPP P======= ======== =------- -------- -------=
            // T - Whether the link parameters exactly match the topic title parameters.
            // P - How well the parameters match.

            // Both of these only apply to Natural Docs links that have parameters.
            if (link.Type == LinkType.NaturalDocs)
            {
                int parametersIndex = ParameterString.GetParametersIndex(link.Text);

                if (parametersIndex != -1)
                {
                    string          linkParametersString = link.Text.Substring(parametersIndex);
                    ParameterString linkParameters       = ParameterString.FromPlainText(linkParametersString);

                    // If the topic title has parameters as well, the link parameters must match them exactly.  We
                    // don't do fuzzy matching with topic title parameters.
                    if (topic.HasTitleParameters && string.Compare(linkParameters, topic.TitleParameters, !language.CaseSensitive) == 0)
                    {
                        score |= 0x0800000000000000;
                        // We can skip the prototype match since this outweighs it.  Also, we don't want two link targets
                        // where the topic title parameters are matched to be distinguished by the prototype parameters.
                        // We'll let it fall through to lower properties in the score.
                    }
                    else
                    {
                        // Score the first nine parameters.
                        for (int i = 0; i < 9; i++)
                        {
                            long paramScore = ScoreParameter(topic.ParsedPrototype, linkParameters, i, !language.CaseSensitive);

                            if (paramScore == -1)
                            {
                                return(0);
                            }

                            paramScore <<= 39 + ((9 - i) * 2);
                            score       |= paramScore;
                        }

                        // The tenth is special.  It's possible that functions may have more than ten parameters, so we go
                        // through the rest of them and use the lowest score we get.

                        long lastParamScore = ScoreParameter(topic.ParsedPrototype, linkParameters, 9, !language.CaseSensitive);
                        int  maxParameters  = linkParameters.NumberOfParameters;

                        if (topic.ParsedPrototype != null && topic.ParsedPrototype.NumberOfParameters > maxParameters)
                        {
                            maxParameters = topic.ParsedPrototype.NumberOfParameters;
                        }

                        for (int i = 10; i < maxParameters; i++)
                        {
                            long paramScore = ScoreParameter(topic.ParsedPrototype, linkParameters, i, !language.CaseSensitive);

                            if (paramScore < lastParamScore)
                            {
                                lastParamScore = paramScore;
                            }
                        }

                        if (lastParamScore == -1)
                        {
                            return(0);
                        }

                        lastParamScore <<= 39;
                        score           |= lastParamScore;
                    }
                }
            }


            // ======== ======== ======== ======== ======== =BFFFFFF Rbbbbbbb brrrrrr=
            // Finish off the score with the topic properties.

            // B - Whether the topic has a body
            // F - How high on the list of topics that define the same symbol in the same file this is.
            // R - Whether the topic has a prototype.
            // b - The length of the body divided by 16.
            // r - The length of the prototype divided by 16.

            score |= ScoreTopic(topic);

            return(score);
        }