protected override ILabeledDataset <SentimentLabel, SparseVector <double> > MapTestSet(int foldN, ILabeledDataset <SentimentLabel, Tweet> testSet)
 {
     return(new LabeledDataset <SentimentLabel, SparseVector <double> >(testSet.Select(le =>
     {
         SparseVector <double> sparseVector = mFoldBowSpaces[foldN].ProcessDocument(le.Example.Text);
         return new LabeledExample <SentimentLabel, SparseVector <double> >(le.Label, sparseVector);
     })));
 }
Exemple #2
0
        protected override ILabeledDataset <LblT, SparseVector <double> > MapTrainSet(int foldN, ILabeledDataset <LblT, string> trainSet)
        {
            BowSpace bowSpace;

            Preconditions.CheckState(!mFoldBowSpaces.TryGetValue(foldN, out bowSpace));
            Preconditions.CheckState(mFoldBowSpaces.TryAdd(foldN, bowSpace = BowSpaceFunc()));

            List <SparseVector <double> > bowData = bowSpace is DeltaBowSpace <LblT>
                                                    ?((DeltaBowSpace <LblT>)bowSpace).Initialize(trainSet)
                                                        : bowSpace.Initialize(trainSet.Select(d => d.Example));

            var bowDataset = new LabeledDataset <LblT, SparseVector <double> >();

            for (int i = 0; i < bowData.Count; i++)
            {
                bowDataset.Add(trainSet[i].Label, bowData[i]);
            }

            return(bowDataset);
        }
Exemple #3
0
        public List <SparseVector <double> > Initialize(ILabeledDataset <LabelT, string> labeledDataset, bool largeScale)
        {
            bool normalizeVectors = NormalizeVectors;

            NormalizeVectors = false;
            List <SparseVector <double> > bowData = base.Initialize(labeledDataset.Select(d => d.Example), largeScale);

            NormalizeVectors = normalizeVectors;

            // count word label frequencies
            var labelWordCounts = new Dictionary <LabelT, Dictionary <int, int> >();

            for (int i = 0; i < bowData.Count; i++)
            {
                foreach (IdxDat <double> idxDat in bowData[i])
                {
                    LabelT label = labeledDataset[i].Label;
                    Dictionary <int, int> wordCounts;
                    if (!labelWordCounts.TryGetValue(label, out wordCounts))
                    {
                        labelWordCounts.Add(label, wordCounts = new Dictionary <int, int>());
                    }
                    int count;
                    if (!wordCounts.TryGetValue(idxDat.Idx, out count))
                    {
                        wordCounts.Add(idxDat.Idx, 1);
                    }
                    else
                    {
                        wordCounts[idxDat.Idx] = count + 1;
                    }
                }
            }

            // calc deltas
            int labelCount = labelWordCounts.Count;
            var counts     = new List <double>();

            foreach (Word word in Words)
            {
                counts.Clear();
                foreach (KeyValuePair <LabelT, Dictionary <int, int> > kv in labelWordCounts)
                {
                    int count;
                    if (kv.Value.TryGetValue(word.mIdx, out count))
                    {
                        counts.Add(count);
                    }
                }
                if (counts.Any())
                {
                    double max = counts.Max();
                    mWordDeltas.Add(word.mIdx, Math.Abs(Math.Log(
                                                            max / Math.Max(counts.Sum() - max, 1) * (labelCount - 1), 2)));
                }
                else
                {
                    mWordDeltas.Add(word.mIdx, 1);
                }
            }

            // transform vectors using deltas
            var bowDataset = new List <SparseVector <double> >();

            foreach (SparseVector <double> bow in bowData)
            {
                CalcDeltaBow(bow, normalizeVectors);
                bowDataset.Add(bow);
            }

            return(bowDataset);
        }