TryCountVertexTermsByGroup
    (
        IGraph oGraph,
        WordMetricUserSettings oWordMetricUserSettings,
        WordCounter oWordCounter,
        WordPairCounter oWordPairCounter,
        HashSet<String> oUniqueImportedIDs,
        out GraphMetricColumn [] oGraphMetricColumns
    )
    {
        Debug.Assert(oGraph != null);
        Debug.Assert(oWordMetricUserSettings != null);
        Debug.Assert(oWordCounter != null);
        Debug.Assert(oWordPairCounter != null);
        AssertValid();

        List<GraphMetricValueOrdered> oWordWordValues, oWordCountValues,
            oWordSalienceValues;

        List<GraphMetricValueOrdered> oWordPairWord1Values,
            oWordPairWord2Values, oWordPairCountValues,
            oWordPairSalienceValues, oWordPairMutualInformationValues;

        CreateGraphMetricValueLists(
            out oWordWordValues, out oWordCountValues, out oWordSalienceValues,
            
            out oWordPairWord1Values, out oWordPairWord2Values,
            out oWordPairCountValues, out oWordPairSalienceValues,
            out oWordPairMutualInformationValues
            );

        List<GraphMetricValueOrdered> oWordGroupNameValues =
            new List<GraphMetricValueOrdered>();

        List<GraphMetricValueOrdered> oWordPairGroupNameValues =
            new List<GraphMetricValueOrdered>();

        // Get a list of the graph's groups, adding a dummy group for the
        // entire graph and another to contain any non-grouped vertices.

        foreach ( GroupInfo oGroup in
            EnumerateGroupsForCountingVertexTerms(oGraph) )
        {
            // Count the terms in this group.

            oWordCounter.Clear();
            oWordPairCounter.Clear();

            foreach ( IVertex oVertex in EnumerateEdgesOrVertices(
                oGroup.Vertices, false, oGraph, oUniqueImportedIDs) )
            {
                CountTermsInEdgeOrVertex(oVertex,
                    oWordMetricUserSettings.TextColumnName, oWordCounter,
                    oWordPairCounter);
            }

            oWordCounter.CalculateSalienceOfCountedTerms();
            oWordPairCounter.CalculateSalienceOfCountedTerms();
            oWordPairCounter.CalculateMutualInformationOfCountedTerms();

            // Transfer the words and word pairs to the graph metric value
            // lists.

            AddCountedWordsToValueLists(oWordCounter.CountedTerms,
                oWordMetricUserSettings, oGroup.Name, oWordWordValues,
                oWordCountValues, oWordSalienceValues, oWordGroupNameValues);

            AddCountedWordPairsToValueLists(oWordPairCounter.CountedTerms,
                oWordMetricUserSettings, oGroup.Name, oWordPairWord1Values,
                oWordPairWord2Values, oWordPairCountValues,
                oWordPairSalienceValues, oWordPairMutualInformationValues,
                oWordPairGroupNameValues);
        }

        oGraphMetricColumns = CreateGraphMetricColumns(
            oWordWordValues, oWordCountValues, oWordSalienceValues,
            oWordGroupNameValues,
        
            oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues,
            oWordPairSalienceValues, oWordPairMutualInformationValues,
            oWordPairGroupNameValues
            );

        return (true);
    }
    ConcatenateTopWordsAndWordPairs
    (
        IEnumerable<IEdge> oEdges,
        String sStatusEdgeColumnName,
        Int32 iMaximumTopStrings,
        WordCounter oWordCounter,
        WordPairCounter oWordPairCounter,
        out String sTopWordsInTweetByCount,
        out String sTopWordsInTweetBySalience,
        out String sTopWordPairsInTweetByCount,
        out String sTopWordPairsInTweetBySalience
    )
    {
        Debug.Assert(oEdges != null);
        Debug.Assert( !String.IsNullOrEmpty(sStatusEdgeColumnName) );
        Debug.Assert(iMaximumTopStrings > 0);
        Debug.Assert(oWordCounter != null);
        Debug.Assert(oWordPairCounter != null);

        oWordCounter.Clear();
        oWordPairCounter.Clear();

        foreach (IEdge oEdge in oEdges)
        {
            String sStatus;

            if ( oEdge.TryGetNonEmptyStringValue(sStatusEdgeColumnName,
                out sStatus) )
            {
                oWordCounter.CountTermsInDocument(sStatus);
                oWordPairCounter.CountTermsInDocument(sStatus);
            }
        }

        oWordCounter.CalculateSalienceOfCountedTerms();
        oWordPairCounter.CalculateSalienceOfCountedTerms();

        sTopWordsInTweetByCount = String.Join(

            TwitterSearchNetworkWordMetricUtil.WordSeparator,

            TwitterSearchNetworkStringUtil.TakeTopStringsAsArray(

                (from CountedWord oCountedWord in oWordCounter.CountedTerms
                orderby oCountedWord.Count descending
                select oCountedWord.Word)
                ,
                iMaximumTopStrings
            ) );

        sTopWordsInTweetBySalience = String.Join(
        
            TwitterSearchNetworkWordMetricUtil.WordSeparator,

            TwitterSearchNetworkStringUtil.TakeTopStringsAsArray(

                (from CountedWord oCountedWord in oWordCounter.CountedTerms
                orderby oCountedWord.Salience descending
                select oCountedWord.Word)
                ,
                iMaximumTopStrings
            ) );

        sTopWordPairsInTweetByCount = String.Join(
        
            TwitterSearchNetworkWordMetricUtil.WordPairSeparator,

            TwitterSearchNetworkStringUtil.TakeTopStringsAsArray(

                (from CountedWordPair oCountedWordPair in
                    oWordPairCounter.CountedTerms

                orderby oCountedWordPair.Count descending

                select TwitterSearchNetworkWordMetricUtil.FormatWordPair(
                    oCountedWordPair) )
                ,
                iMaximumTopStrings
            ) );

        sTopWordPairsInTweetBySalience = String.Join(

            TwitterSearchNetworkWordMetricUtil.WordPairSeparator,

            TwitterSearchNetworkStringUtil.TakeTopStringsAsArray(

                (from CountedWordPair oCountedWordPair in
                    oWordPairCounter.CountedTerms

                orderby oCountedWordPair.Salience descending

                select TwitterSearchNetworkWordMetricUtil.FormatWordPair(
                    oCountedWordPair) )
                ,
                iMaximumTopStrings
            ) );
    }
    TryCountEdgeTermsByGroup
    (
        IGraph oGraph,
        WordMetricUserSettings oWordMetricUserSettings,
        WordCounter oWordCounter,
        WordPairCounter oWordPairCounter,
        HashSet<String> oUniqueImportedIDs,
        out GraphMetricColumn [] oGraphMetricColumns
    )
    {
        Debug.Assert(oGraph != null);
        Debug.Assert(oWordMetricUserSettings != null);
        Debug.Assert(oWordCounter != null);
        Debug.Assert(oWordPairCounter != null);
        AssertValid();

        List<GraphMetricValueOrdered> oWordWordValues, oWordCountValues,
            oWordSalienceValues;

        List<GraphMetricValueOrdered> oWordPairWord1Values,
            oWordPairWord2Values, oWordPairCountValues,
            oWordPairSalienceValues, oWordPairMutualInformationValues;

        CreateGraphMetricValueLists(
            out oWordWordValues, out oWordCountValues, out oWordSalienceValues,
            
            out oWordPairWord1Values, out oWordPairWord2Values,
            out oWordPairCountValues, out oWordPairSalienceValues,
            out oWordPairMutualInformationValues
            );

        List<GraphMetricValueOrdered> oWordGroupNameValues =
            new List<GraphMetricValueOrdered>();

        List<GraphMetricValueOrdered> oWordPairGroupNameValues =
            new List<GraphMetricValueOrdered>();

        // Get the edges in each of the graph's groups.  Include a "dummy"
        // group that contains the edges that aren't contained in any real
        // groups.

        foreach ( GroupEdgeInfo oGroupEdgeInfo in
            GroupEdgeSorter.SortGroupEdges(oGraph, Int32.MaxValue,
                true, true) )
        {
            // Count the terms in this group.

            oWordCounter.Clear();
            oWordPairCounter.Clear();

            foreach ( IEdge oEdge in EnumerateEdgesOrVertices(
                oGroupEdgeInfo.Edges, true, oGraph, oUniqueImportedIDs) )
            {
                CountTermsInEdgeOrVertex(oEdge,
                    oWordMetricUserSettings.TextColumnName, oWordCounter,
                    oWordPairCounter);
            }

            oWordCounter.CalculateSalienceOfCountedTerms();
            oWordPairCounter.CalculateSalienceOfCountedTerms();
            oWordPairCounter.CalculateMutualInformationOfCountedTerms();

            // Transfer the words and word pairs to the graph metric value
            // lists.

            String sGroupName = oGroupEdgeInfo.GroupName;

            AddCountedWordsToValueLists( oWordCounter.CountedTerms,
                oWordMetricUserSettings, sGroupName, oWordWordValues,
                oWordCountValues, oWordSalienceValues, oWordGroupNameValues);

            AddCountedWordPairsToValueLists( oWordPairCounter.CountedTerms,
                oWordMetricUserSettings, sGroupName, oWordPairWord1Values,
                oWordPairWord2Values, oWordPairCountValues,
                oWordPairSalienceValues, oWordPairMutualInformationValues,
                oWordPairGroupNameValues);

            if (
                sGroupName == GroupEdgeSorter.DummyGroupNameForEntireGraph
                &&
                oUniqueImportedIDs != null
                )
            {
                // This is the dummy group that stores all the edges in the
                // graph.  Note that SortGroupEdges() guarantees that this is
                // the first group, so the imported IDs need to be cleared only
                // once within this loop.

                oUniqueImportedIDs.Clear();
            }
        }

        oGraphMetricColumns = CreateGraphMetricColumns(
            oWordWordValues, oWordCountValues, oWordSalienceValues,
            oWordGroupNameValues,
        
            oWordPairWord1Values, oWordPairWord2Values, oWordPairCountValues,
            oWordPairSalienceValues, oWordPairMutualInformationValues,
            oWordPairGroupNameValues
            );

        return (true);
    }
Ejemplo n.º 4
0
        ConcatenateTopWordsAndWordPairs
        (
            IEnumerable <IEdge> oEdges,
            String sStatusEdgeColumnName,
            Int32 iMaximumTopStrings,
            WordCounter oWordCounter,
            WordPairCounter oWordPairCounter,
            out String sTopWordsInTweetByCount,
            out String sTopWordsInTweetBySalience,
            out String sTopWordPairsInTweetByCount,
            out String sTopWordPairsInTweetBySalience
        )
        {
            Debug.Assert(oEdges != null);
            Debug.Assert(!String.IsNullOrEmpty(sStatusEdgeColumnName));
            Debug.Assert(iMaximumTopStrings > 0);
            Debug.Assert(oWordCounter != null);
            Debug.Assert(oWordPairCounter != null);

            oWordCounter.Clear();
            oWordPairCounter.Clear();

            foreach (IEdge oEdge in oEdges)
            {
                String sStatus;

                if (oEdge.TryGetNonEmptyStringValue(sStatusEdgeColumnName,
                                                    out sStatus))
                {
                    oWordCounter.CountTermsInDocument(sStatus);
                    oWordPairCounter.CountTermsInDocument(sStatus);
                }
            }

            oWordCounter.CalculateSalienceOfCountedTerms();
            oWordPairCounter.CalculateSalienceOfCountedTerms();

            sTopWordsInTweetByCount = String.Join(

                TwitterSearchNetworkWordMetricUtil.WordSeparator,

                TwitterSearchNetworkStringUtil.TakeTopStringsAsArray(

                    (from CountedWord oCountedWord in oWordCounter.CountedTerms
                     orderby oCountedWord.Count descending
                     select oCountedWord.Word)
                    ,
                    iMaximumTopStrings
                    ));

            sTopWordsInTweetBySalience = String.Join(

                TwitterSearchNetworkWordMetricUtil.WordSeparator,

                TwitterSearchNetworkStringUtil.TakeTopStringsAsArray(

                    (from CountedWord oCountedWord in oWordCounter.CountedTerms
                     orderby oCountedWord.Salience descending
                     select oCountedWord.Word)
                    ,
                    iMaximumTopStrings
                    ));

            sTopWordPairsInTweetByCount = String.Join(

                TwitterSearchNetworkWordMetricUtil.WordPairSeparator,

                TwitterSearchNetworkStringUtil.TakeTopStringsAsArray(

                    (from CountedWordPair oCountedWordPair in
                     oWordPairCounter.CountedTerms

                     orderby oCountedWordPair.Count descending

                     select TwitterSearchNetworkWordMetricUtil.FormatWordPair(
                         oCountedWordPair))
                    ,
                    iMaximumTopStrings
                    ));

            sTopWordPairsInTweetBySalience = String.Join(

                TwitterSearchNetworkWordMetricUtil.WordPairSeparator,

                TwitterSearchNetworkStringUtil.TakeTopStringsAsArray(

                    (from CountedWordPair oCountedWordPair in
                     oWordPairCounter.CountedTerms

                     orderby oCountedWordPair.Salience descending

                     select TwitterSearchNetworkWordMetricUtil.FormatWordPair(
                         oCountedWordPair))
                    ,
                    iMaximumTopStrings
                    ));
        }