예제 #1
0
        /// <summary>
        /// Load LabelFeatureData file and distribute - static function for both CLabelFeatureData and CLabelFeatureDataCoded
        /// load in the file and output a LabelFeatureData object that has the specified Type (type = {LabelFeatureData, LabelFeatureDataCode}
        /// </summary>
        /// <param name="inFileName">the name of the file: xxx.tsv == tsv file formation; xxx.bin == binary uncoded data format; xxx.dp == binary coded data format</param>
        /// <param name="featureParser">parser that understand the feature values</param>
        /// <param name="labelParser">parser that understand the label values</param>
        /// <param name="dataGroupBoundary">data group boundaries</param>
        /// <param name="outDatatype">the output data type LabelFeatureData or LabelFeatureDataCoded</param>
        /// <param name="activeFeatureNames">only these feature values are loaded</param>
        /// <param name="cThreads">number of threads used to code the original data</param>
        /// <returns>the desired LabelFeatureData if no errors in loading; otherwise, null</returns>
        static public LabelFeatureData DistributeLoad(string inFileName, IParser<float> featureParser, IParser<float> labelParser, IGroupBoundary dataGroupBoundary,
                                            Type outDataType, string[] activeFeatureNames, int cThreads, bool fCacheCodedFeature, bool fSparseCoded, Random r, double rangeLower, double rangeUpper)
        {
            if (inFileName == null)
            {
                return null;
            }

            string[] fields = inFileName.Split('.');
            if (fields.Length <= 0)
            {
                return null;
            }

            CLabelFeatureData labelFeatureData = null;

            string sufix = fields[fields.Length - 1];
            if (string.Compare(sufix, "tsv", true) == 0 || string.Compare(sufix, "gz", true) == 0)
            {
                TsvFileLoader tsvFileLoader = new TsvFileLoader(inFileName, null, labelParser, featureParser, dataGroupBoundary, r, rangeLower, rangeUpper);

                labelFeatureData = new CLabelFeatureData(tsvFileLoader.FeatureName, tsvFileLoader.Labels, tsvFileLoader.GroupId, tsvFileLoader.Feature);
            }
            else if (string.Compare(sufix, "bin", true) == 0 || string.Compare(sufix, "dp", true) == 0)
            {
                //initially, only accept tsv file.
                return null;

                //BinaryReaderEx binReaderEx = new BinaryReaderEx(inFileName);
                //Type t = binReaderEx.Read<Type>();
                //labelFeatureData = (CLabelFeatureData)binReaderEx.Read(t);
                //binReaderEx.Close();

                //labelFeatureData.SetActiveFeatures(activeFeatureNames);
            }

            if (outDataType.Equals(typeof(CLabelFeatureDataCoded)))
            {
                if (labelFeatureData.GetType().Equals(typeof(CLabelFeatureDataCoded)))
                {
                    if (fCacheCodedFeature)
                    {
                        ((CLabelFeatureDataCoded)labelFeatureData).EncodeFeatureValues(cThreads, fSparseCoded);
                    }
                }
                else
                {
                    //need to upgrade to coded                    
                    labelFeatureData = new CLabelFeatureDataCoded(labelFeatureData, cThreads, fCacheCodedFeature, fSparseCoded);
                }
            }

            return labelFeatureData;
        }
예제 #2
0
 static public LabelFeatureData Load(string inFileName, IParser<float> featureParser, IParser<float> labelParser, IGroupBoundary dataGroupBoundary,
                                     Type outDataType, string[] activeFeatureNames, int cThreads, bool fSparseCoded)
 {
     return Load(inFileName, featureParser, labelParser, dataGroupBoundary, outDataType, activeFeatureNames, cThreads, true, fSparseCoded);
 }
예제 #3
0
 public TsvFileLoader(string tsvFileName, 
                         IParser<string> metaParser, IParser<float> labelParser, IParser<float> featureParser,
                         IGroupBoundary groupBoundary) :
     this(CreateTsvFile(tsvFileName, metaParser, labelParser, featureParser, groupBoundary))
 {            
 }
예제 #4
0
 public TsvFileLoader(string tsvFileName,
                         IParser<string> metaParser, IParser<float> labelParser, IParser<float> featureParser,
                         IGroupBoundary groupBoundary, Random r, double rangeLower, double rangeUpper) :
     this(CreateTsvFile(tsvFileName, metaParser, labelParser, featureParser, groupBoundary), r, rangeLower, rangeUpper)
 {
 }
예제 #5
0
        public static RankingTSVFile<MsnData> CreateTsvFile(string tsvFileName,
                                IParser<string> metaParser, IParser<float> labelParser, IParser<float> featureParser,
                                IGroupBoundary groupBoundary)
        {
            MsnData msnData = new MsnData();

            if (metaParser != null)
            {
                msnData.Meta.Parser = metaParser;
            }
            else
            {
                msnData.Meta.Parser = DefaultMetaParser;
            }

            if (labelParser != null)
            {
                msnData.Labels.Parser = labelParser;
            }
            else
            {
                msnData.Labels.Parser = DefaultLabelParser;
            }

            if (featureParser != null)
            {
                msnData.Feature.Parser = featureParser;
            }

            RankingTSVFile<MsnData> tsvFile = new RankingTSVFile<MsnData>(tsvFileName, msnData);

            if (groupBoundary != null)
            {
                tsvFile.GroupBoundary = groupBoundary;
            }
            else
            {
                tsvFile.GroupBoundary = DefaultGroupBoundary;
            }

            return tsvFile;
        }