Beispiel #1
0
        static void getSegments(dataSeq x, dataSet X2)
        {
            int rand = randomTool.getOneRandom_int(-100, 100);

            if (rand <= 0)//forward
            {
                for (int node = 0; node < x.Count;)
                {
                    int     step = getStep();
                    dataSeq x2   = new dataSeq(x, node, step + Global.overlapLength);
                    X2.Add(x2);
                    node += step;
                }
            }
            else//backward
            {
                for (int node = x.Count - 1; node >= 0;)
                {
                    int     step = getStep();
                    dataSeq x2   = new dataSeq(x, node, step + Global.overlapLength, false);
                    X2.Add(x2);
                    node -= step;
                }
            }
        }
Beispiel #2
0
        public static dataSet structSplit(dataSet X)
        {
            //make fractions
            dataSet X2 = new dataSet(X.NTag, X.NFeature);

            for (int t = 0; t < X.Count; t++)
            {
                dataSeq x = X[t];

                if (Global.structReg && Global.miniSize != 0)
                {
                    /*int step = getStep();
                     * //if (x.Count > 4 * step)
                     * if (x.Count > 4 * step && Global.segStep.ToString().Contains(".") == false)//divide x to 2 segments, then do fine segments
                     * {
                     *  int rand = randomTool.getOneRandom_int(step, x.Count - step);
                     *  dataSeq x1 = new dataSeq(x, 0, rand);
                     *  dataSeq x2 = new dataSeq(x, rand, x.Count);
                     *  getSegments(x1, X2);
                     *  getSegments(x2, X2);
                     * }
                     * else*/
                    getSegments(x, X2);
                }
                else
                {
                    X2.Add(x);
                }
            }

            return(X2);
        }
Beispiel #3
0
        public static void dataSplit(dataSet X, double v1, double v2, dataSet X1, dataSet X2)
        {
            if (v2 < v1)
            {
                throw new Exception("error");
            }
            X1.Clear();
            X2.Clear();
            X1.setDataInfo(X);
            X2.setDataInfo(X);
            int n1 = (int)(X.Count * v1);
            int n2 = (int)(X.Count * v2);

            for (int i = 0; i < X.Count; i++)
            {
                if (i >= n1 && i < n2)
                {
                    X1.Add(X[i]);
                }
                else
                {
                    X2.Add(X[i]);
                }
            }
        }
        void initForMulti()
        {
            _simiBiAry = new double[Global.nTask, Global.nTask];
            for (int i = 0; i < Global.nTask; i++)
            {
                _simiBiAry[i, i] = 1;
            }

            if (_XList != null)
            {
                _newXList = new List <dataSet>();

                List <int> sizeList = new List <int>();
                int        maxSize  = 0;
                for (int i = 0; i < _XList.Count; i++)
                {
                    dataSet Xi   = _XList[i];
                    int     size = Xi.Count;
                    sizeList.Add(size);
                    if (maxSize < size)
                    {
                        maxSize = size;
                    }
                }

                for (int i = 0; i < Global.nTask; i++)
                {
                    dataSet X  = new dataSet();
                    dataSet Xi = _XList[i];
                    foreach (dataSeq x in Xi)
                    {
                        X.Add(x);
                    }
                    //to make Xs in newXList have the same length
                    for (int k = sizeList[i]; k < maxSize; k++)
                    {
                        X.Add(null);
                    }
                    _newXList.Add(X);
                }
            }
        }
Beispiel #5
0
        public dataSet randomShuffle()
        {
            List <int> ri = randomTool <int> .getShuffledIndexList(this.Count);

            dataSet X = new dataSet(this.NTag, this.NFeature);

            foreach (int i in ri)
            {
                X.Add(this[i]);
            }
            return(X);
        }
Beispiel #6
0
        public static void dataSplit(dataSet X, double v, dataSet X1, dataSet X2)
        {
            X1.Clear();
            X2.Clear();
            X1.setDataInfo(X);
            X2.setDataInfo(X);
            int n = (int)(X.Count * v);

            for (int i = 0; i < X.Count; i++)
            {
                if (i < n)
                {
                    X1.Add(X[i]);
                }
                else
                {
                    X2.Add(X[i]);
                }
            }
        }
Beispiel #7
0
        public static void dataSizeScale(dataSet X)
        {
            dataSet XX = new dataSet();

            XX.setDataInfo(X);
            foreach (dataSeq im in X)
            {
                XX.Add(im);
            }
            X.Clear();

            int n = (int)(XX.Count * Global.trainSizeScale);

            for (int i = 0; i < n; i++)
            {
                int j = i;
                if (j > XX.Count - 1)
                {
                    j %= XX.Count - 1;
                }
                X.Add(XX[j]);
            }
            X.setDataInfo(XX);
        }
Beispiel #8
0
        public static void loadData_multi(List <dataSet> XList, dataSet X, List <dataSet> XXList)
        {
            XList.Clear();
            XXList.Clear();
            //load train data
            baseHashSet <int> checkSet = new baseHashSet <int>();

            for (int i = 0; i < Global.nTask; i++)
            {
                string  dat_i = i.ToString() + Global.fFeatureTrain;
                string  tag_i = i.ToString() + Global.fGoldTrain;
                dataSet Xi    = new dataSet(dat_i, tag_i);
                dataSizeScale(Xi);
                checkSet.Add(Xi.NFeatureTemp);
                XList.Add(Xi);
            }
            if (checkSet.Count > 1)
            {
                throw new Exception("inconsistent features among multi tasks!");
            }

            //make nTag consistent among different tasks
            int maxNTag = 0;

            foreach (dataSet Xi in XList)
            {
                if (maxNTag < Xi.NTag)
                {
                    maxNTag = Xi.NTag;
                }
            }
            for (int i = 0; i < Global.nTask; i++)
            {
                XList[i].NTag = maxNTag;
            }

            //add to merged data
            X.NTag         = XList[0].NTag;
            X.NFeatureTemp = XList[0].NFeatureTemp;
            foreach (dataSet Xi in XList)
            {
                foreach (dataSeq im in Xi)
                {
                    X.Add(im);
                }
            }
            Global.swLog.WriteLine("data sizes (1, ..., T):");
            for (int i = 0; i < Global.nTask; i++)
            {
                dataSet Xi = XList[i];
                Global.swLog.WriteLine(" " + Xi.Count.ToString());
            }
            Global.swLog.WriteLine();

            //load test data
            for (int i = 0; i < Global.nTask; i++)
            {
                string  dat_i = i.ToString() + Global.fFeatureTest;
                string  tag_i = i.ToString() + Global.fGoldTest;
                dataSet Xtest = new dataSet(dat_i, tag_i);
                XXList.Add(Xtest);
            }
            for (int i = 0; i < Global.nTask; i++)
            {
                XXList[i].NTag = maxNTag;
            }
        }