/// <summary> /// Load LabelFeatureData file and distribute - static function for both CLabelFeatureData and CLabelFeatureDataCoded /// load in the file and output a LabelFeatureData object that has the specified Type (type = {LabelFeatureData, LabelFeatureDataCode} /// </summary> /// <param name="inFileName">the name of the file: xxx.tsv == tsv file formation; xxx.bin == binary uncoded data format; xxx.dp == binary coded data format</param> /// <param name="featureParser">parser that understand the feature values</param> /// <param name="labelParser">parser that understand the label values</param> /// <param name="dataGroupBoundary">data group boundaries</param> /// <param name="outDatatype">the output data type LabelFeatureData or LabelFeatureDataCoded</param> /// <param name="activeFeatureNames">only these feature values are loaded</param> /// <param name="cThreads">number of threads used to code the original data</param> /// <returns>the desired LabelFeatureData if no errors in loading; otherwise, null</returns> static public LabelFeatureData DistributeLoad(string inFileName, IParser<float> featureParser, IParser<float> labelParser, IGroupBoundary dataGroupBoundary, Type outDataType, string[] activeFeatureNames, int cThreads, bool fCacheCodedFeature, bool fSparseCoded, Random r, double rangeLower, double rangeUpper) { if (inFileName == null) { return null; } string[] fields = inFileName.Split('.'); if (fields.Length <= 0) { return null; } CLabelFeatureData labelFeatureData = null; string sufix = fields[fields.Length - 1]; if (string.Compare(sufix, "tsv", true) == 0 || string.Compare(sufix, "gz", true) == 0) { TsvFileLoader tsvFileLoader = new TsvFileLoader(inFileName, null, labelParser, featureParser, dataGroupBoundary, r, rangeLower, rangeUpper); labelFeatureData = new CLabelFeatureData(tsvFileLoader.FeatureName, tsvFileLoader.Labels, tsvFileLoader.GroupId, tsvFileLoader.Feature); } else if (string.Compare(sufix, "bin", true) == 0 || string.Compare(sufix, "dp", true) == 0) { //initially, only accept tsv file. return null; //BinaryReaderEx binReaderEx = new BinaryReaderEx(inFileName); //Type t = binReaderEx.Read<Type>(); //labelFeatureData = (CLabelFeatureData)binReaderEx.Read(t); //binReaderEx.Close(); //labelFeatureData.SetActiveFeatures(activeFeatureNames); } if (outDataType.Equals(typeof(CLabelFeatureDataCoded))) { if (labelFeatureData.GetType().Equals(typeof(CLabelFeatureDataCoded))) { if (fCacheCodedFeature) { ((CLabelFeatureDataCoded)labelFeatureData).EncodeFeatureValues(cThreads, fSparseCoded); } } else { //need to upgrade to coded labelFeatureData = new CLabelFeatureDataCoded(labelFeatureData, cThreads, fCacheCodedFeature, fSparseCoded); } } return labelFeatureData; }
static public LabelFeatureData Load(string inFileName, IParser<float> featureParser, IParser<float> labelParser, IGroupBoundary dataGroupBoundary, Type outDataType, string[] activeFeatureNames, int cThreads, bool fSparseCoded) { return Load(inFileName, featureParser, labelParser, dataGroupBoundary, outDataType, activeFeatureNames, cThreads, true, fSparseCoded); }
public TsvFileLoader(string tsvFileName, IParser<string> metaParser, IParser<float> labelParser, IParser<float> featureParser, IGroupBoundary groupBoundary) : this(CreateTsvFile(tsvFileName, metaParser, labelParser, featureParser, groupBoundary)) { }
public TsvFileLoader(string tsvFileName, IParser<string> metaParser, IParser<float> labelParser, IParser<float> featureParser, IGroupBoundary groupBoundary, Random r, double rangeLower, double rangeUpper) : this(CreateTsvFile(tsvFileName, metaParser, labelParser, featureParser, groupBoundary), r, rangeLower, rangeUpper) { }
public static RankingTSVFile<MsnData> CreateTsvFile(string tsvFileName, IParser<string> metaParser, IParser<float> labelParser, IParser<float> featureParser, IGroupBoundary groupBoundary) { MsnData msnData = new MsnData(); if (metaParser != null) { msnData.Meta.Parser = metaParser; } else { msnData.Meta.Parser = DefaultMetaParser; } if (labelParser != null) { msnData.Labels.Parser = labelParser; } else { msnData.Labels.Parser = DefaultLabelParser; } if (featureParser != null) { msnData.Feature.Parser = featureParser; } RankingTSVFile<MsnData> tsvFile = new RankingTSVFile<MsnData>(tsvFileName, msnData); if (groupBoundary != null) { tsvFile.GroupBoundary = groupBoundary; } else { tsvFile.GroupBoundary = DefaultGroupBoundary; } return tsvFile; }