Beispiel #1
0
        //this version of loader is for cluster distribution
        //take in the number of nodes, and current node number
        //we add in data only if it corresponds to our current node number
        public TsvFileLoader(RankingTSVFile<MsnData> tsvFile, Random r, double rangeLower, double rangeUpper)
        {
            DataNullProc<MsnData> dataNullPro = new DataNullProc<MsnData>();
            IDataEnum<MsnData, MsnData, DataNullProc<MsnData>> tsvDataEnum = new TsvDataStream<MsnData, MsnData, DataNullProc<MsnData>>(tsvFile, dataNullPro);

            int gId = 0;

            featureDataMatrix = new DataMatrixArray<float>();
            List<float> labelList = new List<float>();
            List<int> groupIdList = new List<int>();

            //for each query in the set of queries
            foreach (MsnData d in tsvDataEnum)
            {
                //if it falls in the "Test set" range, load it into the test set
                double rDouble = r.NextDouble();
                if (rDouble >= rangeLower && rDouble < rangeUpper)
                {
                    //load the example
                    if (featureNames == null)
                    {
                        featureNames = new string[d.Feature.NumColumns];
                        for (int i = 0; i < d.Feature.NumColumns; i++)
                        {
                            featureNames[i] = tsvFile.ColumnNames[d.Feature.Parser.columnIndex(i)];
                        }
                    }

                    //get labels
                    for (int i = 0; i < d.Labels.Data.NumRows; i++)
                    {
                        labelList.Add(d.Labels.Data.GetValue(i, 0));
                        groupIdList.Add(gId); // group index/Id					
                    }

                    //get feature data
                    featureDataMatrix.Add(d.Feature.Data);

                    gId++;
                }

                //otherwise, skip that query
            }

            int numRows = labelList.Count;
            labels = new float[numRows];
            groupId = new int[numRows];

            for (int i = 0; i < numRows; i++)
            {
                labels[i] = labelList[i];
                groupId[i] = groupIdList[i];
            }
        }
Beispiel #2
0
        public static RankingTSVFile<MsnData> CreateTsvFile(string tsvFileName,
                                IParser<string> metaParser, IParser<float> labelParser, IParser<float> featureParser,
                                IGroupBoundary groupBoundary)
        {
            MsnData msnData = new MsnData();

            if (metaParser != null)
            {
                msnData.Meta.Parser = metaParser;
            }
            else
            {
                msnData.Meta.Parser = DefaultMetaParser;
            }

            if (labelParser != null)
            {
                msnData.Labels.Parser = labelParser;
            }
            else
            {
                msnData.Labels.Parser = DefaultLabelParser;
            }

            if (featureParser != null)
            {
                msnData.Feature.Parser = featureParser;
            }

            RankingTSVFile<MsnData> tsvFile = new RankingTSVFile<MsnData>(tsvFileName, msnData);

            if (groupBoundary != null)
            {
                tsvFile.GroupBoundary = groupBoundary;
            }
            else
            {
                tsvFile.GroupBoundary = DefaultGroupBoundary;
            }

            return tsvFile;
        }
Beispiel #3
0
        public TsvFileLoader(RankingTSVFile<MsnData> tsvFile)
        {
            DataNullProc<MsnData> dataNullPro = new DataNullProc<MsnData>();
            IDataEnum<MsnData, MsnData, DataNullProc<MsnData>> tsvDataEnum = new TsvDataStream<MsnData, MsnData, DataNullProc<MsnData>>(tsvFile, dataNullPro);

            int gId = 0;

            featureDataMatrix = new DataMatrixArray<float>();
            List<float> labelList = new List<float>();
            List<int> groupIdList = new List<int>();

            foreach (MsnData d in tsvDataEnum)
            {
                if (featureNames == null)
                {
                    featureNames = new string[d.Feature.NumColumns];
                    for (int i = 0; i < d.Feature.NumColumns; i++)
                    {
                        featureNames[i] = tsvFile.ColumnNames[d.Feature.Parser.columnIndex(i)];
                    }
                }

                //get labels
                for (int i = 0; i < d.Labels.Data.NumRows; i++)
                {					
                    labelList.Add(d.Labels.Data.GetValue(i, 0));
                    groupIdList.Add(gId); // group index/Id					
                }

                //get feature data
                featureDataMatrix.Add(d.Feature.Data);

                gId++;
            }

            int numRows = labelList.Count;			
            labels = new float[numRows];
            groupId = new int[numRows];

            for (int i = 0; i < numRows; i++)
            {				
                labels[i] = labelList[i];
                groupId[i] = groupIdList[i];
            }           
        }