public CLabelFeatureDataSubset(CLabelFeatureData labelFeatureData, Subset subset) : base(labelFeatureData) { this.cSubSet = 0; this.mapTbl = new int[labelFeatureData.NumDataPoint]; for (int i = 0; i < labelFeatureData.NumDataPoint; i++) { if (subset.Keep(i)) { this.mapTbl[this.cSubSet++] = i; } } }
static void Main(string[] args) { DataProcArgs cmd = new DataProcArgs(args); IGroupBoundary boundary = null; if (cmd.queryBoundary) { //we need to keep tract of the queries for ranking boundary = new QueryBoundary(); } else { //data boundary: no boundary boundary = new OnelineGroup(); } string[] labelName = { cmd.labelName }; IParser<float> RateParser = new MsnLabelParser(labelName, cmd.labelNameValueFile); Console.WriteLine("Loading data from tsv file " + cmd.tsvFile); MsnFeatureParser featureParser = null; //read and process only a subset of activated features as specified in the activeFeatureFile if (cmd.activeFeatureFile != null) { string[] FeatureNames = TsvFileLoader.ReadFeatureNames(cmd.activeFeatureFile); featureParser = new MsnFeatureParser(FeatureNames); } TsvFileLoader tsvFileLoader = new TsvFileLoader(cmd.tsvFile, null, RateParser, featureParser, boundary); Console.WriteLine("Finishing loading the tsv file"); Console.WriteLine("Create LabelFeatureData uncoded ..."); CLabelFeatureData labelFeatureData = new CLabelFeatureData(tsvFileLoader.FeatureName, tsvFileLoader.Labels, tsvFileLoader.GroupId, tsvFileLoader.Feature); Console.WriteLine("Save LabelFeatureData uncoded ..."); if (cmd.binFile != null) { labelFeatureData.Save(cmd.binFile); } Console.WriteLine("Create LabelFeatureData coded ..."); CLabelFeatureDataCoded labelFeatureDataCoded = new CLabelFeatureDataCoded(labelFeatureData, cmd.cThreads, cmd.storeCodedFeature, cmd.fCodedFeatureSparse); Console.WriteLine("Save LabelFeatureData coded ..."); if (cmd.binFileCoded != null) { labelFeatureDataCoded.Save(cmd.binFileCoded); } }
public CLabelFeatureDataCoded(CLabelFeatureData labelFeatureData, int cThreads) : this(labelFeatureData, cThreads, true) { }
public CLabelFeatureDataCoded(CLabelFeatureData labelFeatureData, int cThreads, bool fStoreCodedFeature) : this(labelFeatureData, cThreads, fStoreCodedFeature, true) { }
public CLabelFeatureDataCoded(CLabelFeatureData labelFeatureData, int cThreads, bool fStoreCodedFeature, bool fSparse) : base(labelFeatureData) { //input data int numRows = labelFeatureData.NumDataPoint; int numCols = labelFeatureData.NumFeatures; //compute the code book for each feature this.codeBook = ComputeCodeBook(this.feature, cThreads); //encode the original feature values and store them for speed if required if (fStoreCodedFeature) { this.featureCoded = EncodeFeatureValues(this.feature, this.codeBook, cThreads, fSparse); } }
public CLabelFeatureData(CLabelFeatureData labelFeatureData) : base(labelFeatureData) { this.feature = labelFeatureData.feature; }
/// <summary> /// Load LabelFeatureData file and distribute - static function for both CLabelFeatureData and CLabelFeatureDataCoded /// load in the file and output a LabelFeatureData object that has the specified Type (type = {LabelFeatureData, LabelFeatureDataCode} /// </summary> /// <param name="inFileName">the name of the file: xxx.tsv == tsv file formation; xxx.bin == binary uncoded data format; xxx.dp == binary coded data format</param> /// <param name="featureParser">parser that understand the feature values</param> /// <param name="labelParser">parser that understand the label values</param> /// <param name="dataGroupBoundary">data group boundaries</param> /// <param name="outDatatype">the output data type LabelFeatureData or LabelFeatureDataCoded</param> /// <param name="activeFeatureNames">only these feature values are loaded</param> /// <param name="cThreads">number of threads used to code the original data</param> /// <returns>the desired LabelFeatureData if no errors in loading; otherwise, null</returns> static public LabelFeatureData DistributeLoad(string inFileName, IParser<float> featureParser, IParser<float> labelParser, IGroupBoundary dataGroupBoundary, Type outDataType, string[] activeFeatureNames, int cThreads, bool fCacheCodedFeature, bool fSparseCoded, Random r, double rangeLower, double rangeUpper) { if (inFileName == null) { return null; } string[] fields = inFileName.Split('.'); if (fields.Length <= 0) { return null; } CLabelFeatureData labelFeatureData = null; string sufix = fields[fields.Length - 1]; if (string.Compare(sufix, "tsv", true) == 0 || string.Compare(sufix, "gz", true) == 0) { TsvFileLoader tsvFileLoader = new TsvFileLoader(inFileName, null, labelParser, featureParser, dataGroupBoundary, r, rangeLower, rangeUpper); labelFeatureData = new CLabelFeatureData(tsvFileLoader.FeatureName, tsvFileLoader.Labels, tsvFileLoader.GroupId, tsvFileLoader.Feature); } else if (string.Compare(sufix, "bin", true) == 0 || string.Compare(sufix, "dp", true) == 0) { //initially, only accept tsv file. return null; //BinaryReaderEx binReaderEx = new BinaryReaderEx(inFileName); //Type t = binReaderEx.Read<Type>(); //labelFeatureData = (CLabelFeatureData)binReaderEx.Read(t); //binReaderEx.Close(); //labelFeatureData.SetActiveFeatures(activeFeatureNames); } if (outDataType.Equals(typeof(CLabelFeatureDataCoded))) { if (labelFeatureData.GetType().Equals(typeof(CLabelFeatureDataCoded))) { if (fCacheCodedFeature) { ((CLabelFeatureDataCoded)labelFeatureData).EncodeFeatureValues(cThreads, fSparseCoded); } } else { //need to upgrade to coded labelFeatureData = new CLabelFeatureDataCoded(labelFeatureData, cThreads, fCacheCodedFeature, fSparseCoded); } } return labelFeatureData; }
override public bool Evaluate(float[] features, float[] results) { float[][] testData = new float[1][]; testData[0] = features; LabelFeatureData labelFeatureData = new CLabelFeatureData(testData); float[][] prob = boostTree.Predict(labelFeatureData); for (int i = 0; i < results.Length; i++) { results[i] = prob[i][0]; } return true; }