Beispiel #1
0
        public static Feature New(byte[] buffer, ref int position)
        {
            using (Timer.Time(TimerEvent.ConstructFromByteArray))
            {
                FeatureType type = (FeatureType)buffer.ToInt(ref position);

                switch (type)
                {
                case FeatureType.Raw:
                    TsvFeature tf = new TsvFeature(buffer, ref position);
#if !NO_STORE
                    tf.BinsCache = FileObjectStore <IntArrayFormatter> .GetDefaultInstance();
#endif
                    return(tf);

                default:
                    throw Contracts.Except("Impossible!");
                }
            }
        }
        /// <summary>
        /// Returns an object store by passing an instance name.
        /// </summary>
        /// <param name="instanceName">Object store instance name. If null is passed, then it returns the default object store</param>
        /// <returns>The file object store with the instance name. Null if instance name does not exist</returns>
        public static FileObjectStore <T> GetInstance(string instanceName)
        {
            FileObjectStore <T> fileObjectStore = null;

            lock (instances)
            {
                if (string.IsNullOrEmpty(instanceName) && instances.Count > 0)
                {
                    fileObjectStore = instances.ElementAt(0).Value;
                }
                else if (!string.IsNullOrEmpty(instanceName) && !instances.TryGetValue(instanceName, out fileObjectStore))
                {
                    fileObjectStore = new FileObjectStore <T>();
                    fileObjectStore.InitializeAsFileStream(instanceName);
                    instances.Add(instanceName, fileObjectStore);
                }

                return(fileObjectStore);
            }
        }
Beispiel #3
0
        private void GetSubDataset_ThreadWorker(DerivedFeature[] features, int f, int[][] docAssignment, bool destroyThisDataset, FileObjectStore <IntArrayFormatter> newBinsCache)
        {
            features[f]           = Features[f].Split(docAssignment)[0];
            features[f].BinsCache = newBinsCache;

            if (newBinsCache != null)
            {
                features[f].Bins = null;
            }

            if (destroyThisDataset)
            {
                Features[f] = null;
            }
        }
Beispiel #4
0
        public Dataset GetSubDataset(int[] docIndices, bool destroyThisDataset, FileObjectStore <IntArrayFormatter> newBinsCache)
        {
#endif
            int[]   queryIndices   = docIndices.Select(d => DocToQuery[d]).ToArray();
            ulong[] uniqueQueryIds = queryIndices.Distinct().Select(q => QueryIds[q]).ToArray();

            // calculate boundaries
            int[] boundaries = new int[uniqueQueryIds.Length + 1];
            boundaries[0] = 0;
            int queryIndex = 1;
            for (int q = 1; q < queryIndices.Length; ++q)
            {
                if (queryIndices[q] != queryIndices[q - 1])
                {
                    boundaries[queryIndex++] = q;
                }
            }
            boundaries[uniqueQueryIds.Length] = queryIndices.Length;

            // construct skeleton
            DatasetSkeleton datasetSkeleton = new DatasetSkeleton(docIndices.Select(d => Ratings[d]).ToArray(),
                                                                  boundaries,
                                                                  uniqueQueryIds,
                                                                  docIndices.Select(d => DocIds[d]).ToArray());

            // create features
            FeatureFlockBase[] features   = new FeatureFlockBase[NumFlocks];
            int[][]            assignment = new int[][] { docIndices };
            Parallel.For(0, NumFlocks, new ParallelOptions {
                MaxDegreeOfParallelism = BlockingThreadPool.NumThreads
            },
                         (int flockIndex) =>
            {
#if !NO_STORE
                GetSubDataset_ThreadWorker(features, flockIndex, assignment, destroyThisDataset, newBinsCache);
#else
                GetSubDatasetThreadWorker(features, flockIndex, assignment, destroyThisDataset);
#endif
            });

            uint[] filteredDupeIds = null;

            // Filter the dupe ids, if any
            if (DupeIds != null)
            {
                uint[] dupeIds = DupeIds;
                filteredDupeIds = docIndices.Select(i => dupeIds[i]).ToArray();
            }

            // auxiliary data
            Dictionary <string, DatasetSkeletonQueryDocData> auxData    = _datasetSkeleton.AuxiliaryData;
            Dictionary <string, DatasetSkeletonQueryDocData> newAuxData = new Dictionary <string, DatasetSkeletonQueryDocData>();
            foreach (KeyValuePair <string, DatasetSkeletonQueryDocData> pair in auxData)
            {
                newAuxData[pair.Key] = pair.Value.GetSubset(pair.Value.IsQueryLevel ? queryIndices.Distinct().ToArray() : docIndices);
            }
            datasetSkeleton.AuxiliaryData = newAuxData;

            // create new Dataset
            Dataset dataset = new Dataset(datasetSkeleton, features);
            dataset.DupeIds = filteredDupeIds;
            return(dataset);
        }