public void assignLabels( LingoProcessingContext context, DoubleMatrix2D stemCos, IntIntOpenHashMap filteredRowToStemIndex, DoubleMatrix2D phraseCos )
        {
            PreprocessingContext preprocessingContext = context.preprocessingContext;
            int firstPhraseIndex = preprocessingContext.allLabels.firstPhraseIndex;
            int [] labelsFeatureIndex = preprocessingContext.allLabels.featureIndex;
            int [] mostFrequentOriginalWordIndex = preprocessingContext.allStems.mostFrequentOriginalWordIndex;
            int desiredClusterCount = stemCos.columns();

            IntArrayList clusterLabelFeatureIndex = new IntArrayList(
                desiredClusterCount);
            DoubleArrayList clusterLabelScore = new DoubleArrayList(desiredClusterCount);
            for (int label = 0; label < desiredClusterCount; label++)
            {
                Pair<int, int> stemMax = max(stemCos);
                Pair<int, int> phraseMax = max(phraseCos);

                if (stemMax == null && phraseMax == null)
                {
                    break;
                }

                double stemScore = stemMax != null ? stemCos.getQuick(stemMax.objectA,
                    stemMax.objectB) : -1;
                double phraseScore = phraseMax != null ? phraseCos.getQuick(
                    phraseMax.objectA, phraseMax.objectB) : -1;

                if (phraseScore > stemScore)
                {
                    phraseCos.viewRow(phraseMax.objectA).assign(0);
                    phraseCos.viewColumn(phraseMax.objectB).assign(0);
                    stemCos.viewColumn(phraseMax.objectB).assign(0);

                    clusterLabelFeatureIndex.add(labelsFeatureIndex[phraseMax.objectA
                        + firstPhraseIndex]);
                    clusterLabelScore.add(phraseScore);
                }
                else
                {
                    stemCos.viewRow(stemMax.objectA).assign(0);
                    stemCos.viewColumn(stemMax.objectB).assign(0);
                    if (phraseCos != null)
                    {
                        phraseCos.viewColumn(stemMax.objectB).assign(0);
                    }

                    clusterLabelFeatureIndex
                        .add(mostFrequentOriginalWordIndex[filteredRowToStemIndex
                            .get(stemMax.objectA)]);
                    clusterLabelScore.add(stemScore);
                }
            }

            context.clusterLabelFeatureIndex = clusterLabelFeatureIndex.toArray();
            context.clusterLabelScore = clusterLabelScore.toArray();
        }
        public void assignLabels( LingoProcessingContext context, DoubleMatrix2D stemCos, IntIntOpenHashMap filteredRowToStemIndex, DoubleMatrix2D phraseCos )
        {
            PreprocessingContext preprocessingContext = context.preprocessingContext;
            int firstPhraseIndex = preprocessingContext.allLabels.firstPhraseIndex;
            int[] labelsFeatureIndex = preprocessingContext.allLabels.featureIndex;
            int[] mostFrequentOriginalWordIndex = preprocessingContext.allStems.mostFrequentOriginalWordIndex;
            int desiredClusterCount = stemCos.columns();
            int[] candidateStemIndices = new int[desiredClusterCount];
            double[] candidateStemScores = new double[desiredClusterCount];

            int[] candidatePhraseIndices = new int[desiredClusterCount];
            for ( int i = 0; i < desiredClusterCount; i++ )
            {
                candidatePhraseIndices[i] = -1;
            }
            double[] candidatePhraseScores = new double[desiredClusterCount];
            MatrixUtils.maxInColumns( stemCos, candidateStemIndices, candidateStemScores,
                Functions.ABS );
            if ( phraseCos != null )
            {
                MatrixUtils.maxInColumns( phraseCos, candidatePhraseIndices,
                    candidatePhraseScores, Functions.ABS );
            }
            int[] clusterLabelFeatureIndex = new int[desiredClusterCount];
            double [] clusterLabelScore = new double [desiredClusterCount];
            for (int i = 0; i < desiredClusterCount; i++)
            {
                int phraseFeatureIndex = candidatePhraseIndices[i];
                int stemIndex = filteredRowToStemIndex.get(candidateStemIndices[i]);

                double phraseScore = candidatePhraseScores[i];
                if (phraseFeatureIndex >= 0 && phraseScore > candidateStemScores[i])
                {
                    clusterLabelFeatureIndex[i] = labelsFeatureIndex[phraseFeatureIndex
                        + firstPhraseIndex];
                    clusterLabelScore[i] = phraseScore;
                }
                else
                {
                    clusterLabelFeatureIndex[i] = mostFrequentOriginalWordIndex[stemIndex];
                    clusterLabelScore[i] = candidateStemScores[i];
                }
            }
            context.clusterLabelFeatureIndex = clusterLabelFeatureIndex;
            context.clusterLabelScore = clusterLabelScore;
        }
        void assignDocuments( LingoProcessingContext context )
        {
            int[] clusterLabelFeatureIndex = context.clusterLabelFeatureIndex;
            BitSet[] clusterDocuments = new BitSet[clusterLabelFeatureIndex.Length];
            int[] labelsFeatureIndex = context.preprocessingContext.allLabels.featureIndex;
            BitSet[] documentIndices = context.preprocessingContext.allLabels.documentIndices;
            IntIntOpenHashMap featureValueToIndex = new IntIntOpenHashMap();
            for ( int i = 0; i < labelsFeatureIndex.Length; i++ )
            {
                featureValueToIndex.put( labelsFeatureIndex[i], i );
            }

            for ( int clusterIndex = 0; clusterIndex < clusterDocuments.Length; clusterIndex++ )
            {
                clusterDocuments[clusterIndex] = documentIndices[featureValueToIndex.get( clusterLabelFeatureIndex[clusterIndex] )];
            }

            context.clusterDocuments = clusterDocuments;
        }
        void buildLabels( LingoProcessingContext context, ITermWeighting termWeighting )
        {
            PreprocessingContext preprocessingContext = context.preprocessingContext;
            VectorSpaceModelContext vsmContext = context.vsmContext;
            DoubleMatrix2D reducedTdMatrix = context.reducedVsmContext.baseMatrix;
            int[] wordsStemIndex = preprocessingContext.allWords.stemIndex;
            int[] labelsFeatureIndex = preprocessingContext.allLabels.featureIndex;
            int[] mostFrequentOriginalWordIndex = preprocessingContext.allStems.mostFrequentOriginalWordIndex;
            int[][] phrasesWordIndices = preprocessingContext.allPhrases.wordIndices;
            BitSet[] labelsDocumentIndices = preprocessingContext.allLabels.documentIndices;
            int wordCount = preprocessingContext.allWords.image.length;
            int documentCount = preprocessingContext.documents.Size();
            BitSet oneWordCandidateStemIndices = new BitSet();
            for ( int i = 0; i < labelsFeatureIndex.Length; i++ )
            {
                int featureIndex = labelsFeatureIndex[i];
                if ( featureIndex >= wordCount )
                {
                    break;
                }
                oneWordCandidateStemIndices.set( wordsStemIndex[featureIndex] );
            }
            IntIntOpenHashMap stemToRowIndex = vsmContext.stemToRowIndex;
            IntIntOpenHashMap filteredRowToStemIndex = new IntIntOpenHashMap();
            IntArrayList filteredRows = new IntArrayList();
            int filteredRowIndex = 0;
            foreach ( IntIntCursor it in stemToRowIndex )
            {
                if ( oneWordCandidateStemIndices.get( it.key ) )
                {
                    filteredRowToStemIndex.put( filteredRowIndex++, it.key );
                    filteredRows.add( it.value );
                }
            }
            double[] featureScores = featureScorer != null ? featureScorer.getFeatureScores( context ) : null;
            int[] wordLabelIndex = new int[wordCount];
            for ( int i = 0; i < wordCount; i++ )
            {
                wordLabelIndex[i] = -1;
            }
            for ( int i = 0; i < labelsFeatureIndex.Length; i++ )
            {
                int featureIndex = labelsFeatureIndex[i];
                if ( featureIndex < wordCount )
                {
                    wordLabelIndex[featureIndex] = i;
                }
            }
            DoubleMatrix2D stemCos = reducedTdMatrix.viewSelection(
            filteredRows.toArray(), null ).copy();
            for ( int r = 0; r < stemCos.rows(); r++ )
            {
                int labelIndex = wordLabelIndex[mostFrequentOriginalWordIndex[filteredRowToStemIndex.get( r )]];
                double penalty = getDocumentCountPenalty( labelIndex, documentCount, labelsDocumentIndices );
                if ( featureScores != null )
                {
                    penalty *= featureScores[labelIndex];
                }
                stemCos.viewRow( r ).assign( Functions.mult( penalty ) );
            }
            DoubleMatrix2D phraseMatrix = vsmContext.termPhraseMatrix;
            int firstPhraseIndex = preprocessingContext.allLabels.firstPhraseIndex;
            DoubleMatrix2D phraseCos = null;
            if ( phraseMatrix != null )
            {
                phraseCos = phraseMatrix.zMult( reducedTdMatrix, null, 1, 0, false, false );
                if ( phraseLengthPenaltyStop < phraseLengthPenaltyStart )
                {
                    phraseLengthPenaltyStop = phraseLengthPenaltyStart;
                }
                double penaltyStep = 1.0 / ( phraseLengthPenaltyStop - phraseLengthPenaltyStart + 1 );
                for ( int row = 0; row < phraseCos.rows(); row++ )
                {
                    int phraseFeature = labelsFeatureIndex[row + firstPhraseIndex];
                    int[] phraseWordIndices = phrasesWordIndices[phraseFeature - wordCount];

                    double penalty;
                    if ( phraseWordIndices.Length >= phraseLengthPenaltyStop )
                    {
                        penalty = 0;
                    }
                    else
                    {
                        penalty = getDocumentCountPenalty( row + firstPhraseIndex,
                            documentCount, labelsDocumentIndices );

                        if ( phraseWordIndices.Length >= phraseLengthPenaltyStart )
                        {
                            penalty *= 1 - penaltyStep
                                * ( phraseWordIndices.Length - phraseLengthPenaltyStart + 1 );
                        }
                        if ( featureScores != null )
                        {
                            penalty *= featureScores[row + firstPhraseIndex];
                        }
                    }
                    phraseCos.viewRow( row ).assign( Functions.mult( penalty * phraseLabelBoost ) );
                }
            }
            labelAssigner.assignLabels( context, stemCos, filteredRowToStemIndex, phraseCos );
        }