public CLabelFeatureDataSubset(CLabelFeatureData labelFeatureData, Subset subset)
     : base(labelFeatureData)
 {
     this.cSubSet = 0;
     this.mapTbl = new int[labelFeatureData.NumDataPoint];
     for (int i = 0; i < labelFeatureData.NumDataPoint; i++)
     {
         if (subset.Keep(i))
         {
             this.mapTbl[this.cSubSet++] = i;
         }
     }
 }
Exemple #2
0
        static void Main(string[] args)
        {
            DataProcArgs cmd = new DataProcArgs(args);

            IGroupBoundary boundary = null;

            if (cmd.queryBoundary)
            {
                //we need to keep tract of the queries for ranking
                boundary = new QueryBoundary();
            }
            else
            {
                //data boundary: no boundary
                boundary = new OnelineGroup();
            }

            string[] labelName = { cmd.labelName };
            IParser<float> RateParser = new MsnLabelParser(labelName, cmd.labelNameValueFile);

            Console.WriteLine("Loading data from tsv file " + cmd.tsvFile);

            MsnFeatureParser featureParser = null;
            //read and process only a subset of activated features as specified in the activeFeatureFile
            if (cmd.activeFeatureFile != null)
            {
                string[] FeatureNames = TsvFileLoader.ReadFeatureNames(cmd.activeFeatureFile);
                featureParser = new MsnFeatureParser(FeatureNames);
            }

            TsvFileLoader tsvFileLoader = new TsvFileLoader(cmd.tsvFile, null, RateParser, featureParser, boundary);
            Console.WriteLine("Finishing loading the tsv file");

            Console.WriteLine("Create LabelFeatureData uncoded ...");
            CLabelFeatureData labelFeatureData = new CLabelFeatureData(tsvFileLoader.FeatureName, tsvFileLoader.Labels, tsvFileLoader.GroupId, tsvFileLoader.Feature);

            Console.WriteLine("Save LabelFeatureData uncoded ...");
            if (cmd.binFile != null)
            {
                labelFeatureData.Save(cmd.binFile);
            }

            Console.WriteLine("Create LabelFeatureData coded ...");
            CLabelFeatureDataCoded labelFeatureDataCoded = new CLabelFeatureDataCoded(labelFeatureData, cmd.cThreads, cmd.storeCodedFeature, cmd.fCodedFeatureSparse);

            Console.WriteLine("Save LabelFeatureData coded ...");
            if (cmd.binFileCoded != null)
            {
                labelFeatureDataCoded.Save(cmd.binFileCoded);
            }
        }
 public CLabelFeatureDataCoded(CLabelFeatureData labelFeatureData, int cThreads)
     : this(labelFeatureData, cThreads, true)
 {
 }
 public CLabelFeatureDataCoded(CLabelFeatureData labelFeatureData, int cThreads, bool fStoreCodedFeature)
     : this(labelFeatureData, cThreads, fStoreCodedFeature, true)
 {
 }
        public CLabelFeatureDataCoded(CLabelFeatureData labelFeatureData, int cThreads, bool fStoreCodedFeature, bool fSparse)
            : base(labelFeatureData)
        {
            //input data
            int numRows = labelFeatureData.NumDataPoint;
            int numCols = labelFeatureData.NumFeatures;

            //compute the code book for each feature            
            this.codeBook = ComputeCodeBook(this.feature, cThreads);
            
            //encode the original feature values and store them for speed if required
            if (fStoreCodedFeature)
            {
                this.featureCoded = EncodeFeatureValues(this.feature, this.codeBook, cThreads, fSparse);
            }
        }
 public CLabelFeatureData(CLabelFeatureData labelFeatureData)
     : base(labelFeatureData)
 {                        
     this.feature = labelFeatureData.feature;
 }
        /// <summary>
        /// Load LabelFeatureData file and distribute - static function for both CLabelFeatureData and CLabelFeatureDataCoded
        /// load in the file and output a LabelFeatureData object that has the specified Type (type = {LabelFeatureData, LabelFeatureDataCode}
        /// </summary>
        /// <param name="inFileName">the name of the file: xxx.tsv == tsv file formation; xxx.bin == binary uncoded data format; xxx.dp == binary coded data format</param>
        /// <param name="featureParser">parser that understand the feature values</param>
        /// <param name="labelParser">parser that understand the label values</param>
        /// <param name="dataGroupBoundary">data group boundaries</param>
        /// <param name="outDatatype">the output data type LabelFeatureData or LabelFeatureDataCoded</param>
        /// <param name="activeFeatureNames">only these feature values are loaded</param>
        /// <param name="cThreads">number of threads used to code the original data</param>
        /// <returns>the desired LabelFeatureData if no errors in loading; otherwise, null</returns>
        static public LabelFeatureData DistributeLoad(string inFileName, IParser<float> featureParser, IParser<float> labelParser, IGroupBoundary dataGroupBoundary,
                                            Type outDataType, string[] activeFeatureNames, int cThreads, bool fCacheCodedFeature, bool fSparseCoded, Random r, double rangeLower, double rangeUpper)
        {
            if (inFileName == null)
            {
                return null;
            }

            string[] fields = inFileName.Split('.');
            if (fields.Length <= 0)
            {
                return null;
            }

            CLabelFeatureData labelFeatureData = null;

            string sufix = fields[fields.Length - 1];
            if (string.Compare(sufix, "tsv", true) == 0 || string.Compare(sufix, "gz", true) == 0)
            {
                TsvFileLoader tsvFileLoader = new TsvFileLoader(inFileName, null, labelParser, featureParser, dataGroupBoundary, r, rangeLower, rangeUpper);

                labelFeatureData = new CLabelFeatureData(tsvFileLoader.FeatureName, tsvFileLoader.Labels, tsvFileLoader.GroupId, tsvFileLoader.Feature);
            }
            else if (string.Compare(sufix, "bin", true) == 0 || string.Compare(sufix, "dp", true) == 0)
            {
                //initially, only accept tsv file.
                return null;

                //BinaryReaderEx binReaderEx = new BinaryReaderEx(inFileName);
                //Type t = binReaderEx.Read<Type>();
                //labelFeatureData = (CLabelFeatureData)binReaderEx.Read(t);
                //binReaderEx.Close();

                //labelFeatureData.SetActiveFeatures(activeFeatureNames);
            }

            if (outDataType.Equals(typeof(CLabelFeatureDataCoded)))
            {
                if (labelFeatureData.GetType().Equals(typeof(CLabelFeatureDataCoded)))
                {
                    if (fCacheCodedFeature)
                    {
                        ((CLabelFeatureDataCoded)labelFeatureData).EncodeFeatureValues(cThreads, fSparseCoded);
                    }
                }
                else
                {
                    //need to upgrade to coded                    
                    labelFeatureData = new CLabelFeatureDataCoded(labelFeatureData, cThreads, fCacheCodedFeature, fSparseCoded);
                }
            }

            return labelFeatureData;
        }
Exemple #8
0
        override public bool Evaluate(float[] features, float[] results)
        {
            float[][] testData = new float[1][];
            testData[0] = features;
            LabelFeatureData labelFeatureData = new CLabelFeatureData(testData);

            float[][] prob = boostTree.Predict(labelFeatureData);

            for (int i = 0; i < results.Length; i++)
            {
                results[i] = prob[i][0];
            }

            return true;
        }