Пример #1
0
        public override void Summarize(SummaryParameters mySummaryParameters, string newsDirectory, string cacheFileName)
        {
            Mis = (ContinuousLexRankParameters)mySummaryParameters;

            Debug.WriteLine("Starting execution of ContinuousLexRank.");
            var startTime = DateTime.Now;

            var myTDM              = new TDM(newsDirectory, Mis.MyTDMParameters, cacheFileName);
            var normalized         = ((ContinuousLexRankParameters)mySummaryParameters).SimilarityNormalized;
            var mySimilarityMatrix = new SimilarityMatrix(myTDM, cacheFileName, normalized);

            var totalPhrases         = myTDM.PhrasesList.Count;
            var myCosineSimilarities = mySimilarityMatrix.CosineSimilarityBetweenPhrases;

            // Calculate the transition probabilities over the same similarity matrix
            // the row vector is normalized |v| = 1
            for (var i = 0; i < totalPhrases; i++)
            {
                var sum = 0.0d;
                for (var j = 0; j < totalPhrases; j++)
                {
                    sum += myCosineSimilarities[i][j];
                }

                // The row must add 1 to be a matrix representing a markov chain where the
                // transition probabilities of state i to state j are stored.
                // It must add 1 because all the probabilities of change when accumulating must add 1.
                for (var j = 0; j < totalPhrases; j++)
                {
                    myCosineSimilarities[i][j] /= sum;
                }
            }

            // The neighborhood graph is constructed - Matrix stochastic, irreducible and aperiodic.
            // It is irreducible if a state (node) can be reached from any other state (node).
            // It is aperiodic if the period = 1, i.e. mcd {n: P(n) x,x > 0}
            for (var i = 0; i < totalPhrases; i++)
            {
                for (var j = 0; j < totalPhrases; j++)
                {
                    var valor = myCosineSimilarities[i][j];
                    valor = (Mis.DampingFactor / totalPhrases) +
                            (1 - Mis.DampingFactor) * valor;
                    myCosineSimilarities[i][j] = valor;
                }
            }

            // Based on the Perron-Frobenius theorem, an irreducible and aperiodic Markov chain
            // always converges to a single stationary distribution.
            var weights = UtilLexRank.PowerMethod(myCosineSimilarities, Mis.ErrorTolerance);

            var phrasesList = new List <PositionValue>(); // Save candidate phrases with their weight (relevance)

            for (var i = 0; i < totalPhrases; i++)
            {
                phrasesList.Add(new PositionValue(i, weights[i]));
            }

            //phrasesList.Sort((x,y) => -1*x.Value.CompareTo(y.Value)); // The phrases are ordered by their weight

            phrasesList.Sort(delegate(PositionValue x, PositionValue y)
            {
                if (Math.Abs(x.Value - y.Value) < 1e-07)
                {
                    return(myTDM.PhrasesList[x.Position].PositionInDocument.CompareTo(myTDM.PhrasesList[y.Position].PositionInDocument));
                }
                return(-1 * x.Value.CompareTo(y.Value));
            });

            TextSummary = Util.SummarizeByCompressionRatio(myTDM, phrasesList, mySummaryParameters.MySummaryType,
                                                           Mis.MaximumLengthOfSummaryForRouge, out SummaryByPhrases);

            var endTime = DateTime.Now - startTime;

            Debug.WriteLine("Minutes of ContinuousLexRank: " + endTime.TotalMinutes);
        }
Пример #2
0
        public override void Summarize(SummaryParameters mySummaryParameters, string newsDirectory, string cacheFileName)
        {
            Mis = (LexRankWithThresholdParameters)mySummaryParameters;

            Debug.WriteLine("Starting execution of LexRankWithThreshold.");
            var startTime = DateTime.Now;

            var myTDM              = new TDM(newsDirectory, Mis.MyTDMParameters, cacheFileName);
            var normalized         = ((LexRankWithThresholdParameters)mySummaryParameters).SimilarityNormalized;
            var mySimilarityMatrix = new SimilarityMatrix(myTDM, cacheFileName, normalized);

            var totalPhrases         = myTDM.PhrasesList.Count;
            var myCosineSimilarities = mySimilarityMatrix.CosineSimilarityBetweenPhrases;

            // Calculate the transition probabilities on the same similarity matrix,
            // i.e., the values are passed to 1 or 0 if they exceed the threshold and
            // at the end it is normalized dividing by the number of ones that remained
            // ... the row vector is normalized |v| = 1.
            // With this, one property of markov models are met.
            for (var i = 0; i < totalPhrases; i++)
            {
                var sum = 0.0d;
                for (var j = 0; j < totalPhrases; j++)
                {
                    if (myCosineSimilarities[i][j] > Mis.Threshold)
                    {
                        myCosineSimilarities[i][j] = 1;
                        sum++;
                    }
                    else
                    {
                        myCosineSimilarities[i][j] = 0;
                    }
                }
                for (var j = 0; j < totalPhrases; j++)
                {
                    myCosineSimilarities[i][j] /= sum;
                }
            }

            // Here the other two properties of the markov matrices are met.
            // The neighborhood graph is constructed.
            for (var i = 0; i < totalPhrases; i++)
            {
                for (var j = 0; j < totalPhrases; j++)
                {
                    myCosineSimilarities[i][j] = (Mis.DampingFactor / totalPhrases) +
                                                 (1 - Mis.DampingFactor) * myCosineSimilarities[i][j];
                }
            }

            var weights = UtilLexRank.PowerMethod(myCosineSimilarities, Mis.ErrorTolerance);

            var phrasesList = new List <PositionValue>(); // Save candidate phrases with their weight (relevance)

            for (var i = 0; i < totalPhrases; i++)
            {
                phrasesList.Add(new PositionValue(i, weights[i]));
            }

            //phrasesList.Sort((x,y) => -1*x.Value.CompareTo(y.Value)); // The phrases are ordered by their weight
            phrasesList.Sort(delegate(PositionValue x, PositionValue y)
            {
                if (Math.Abs(x.Value - y.Value) < 1e-07)
                {
                    return(myTDM.PhrasesList[x.Position].PositionInDocument.CompareTo(myTDM.PhrasesList[y.Position].PositionInDocument));
                }
                return(-1 * x.Value.CompareTo(y.Value));
            });

            TextSummary = Util.SummarizeByCompressionRatio(myTDM, phrasesList, mySummaryParameters.MySummaryType,
                                                           Mis.MaximumLengthOfSummaryForRouge, out SummaryByPhrases);

            var fin = DateTime.Now - startTime;

            Debug.WriteLine("Minutes of LexRankWithThreshold " + fin.TotalMinutes);
        }