public static Feature New(byte[] buffer, ref int position) { using (Timer.Time(TimerEvent.ConstructFromByteArray)) { FeatureType type = (FeatureType)buffer.ToInt(ref position); switch (type) { case FeatureType.Raw: TsvFeature tf = new TsvFeature(buffer, ref position); #if !NO_STORE tf.BinsCache = FileObjectStore <IntArrayFormatter> .GetDefaultInstance(); #endif return(tf); default: throw Contracts.Except("Impossible!"); } } }
private void GetSubDataset_ThreadWorker(DerivedFeature[] features, int f, int[][] docAssignment, bool destroyThisDataset, FileObjectStore <IntArrayFormatter> newBinsCache) { features[f] = Features[f].Split(docAssignment)[0]; features[f].BinsCache = newBinsCache; if (newBinsCache != null) { features[f].Bins = null; } if (destroyThisDataset) { Features[f] = null; } }
public Dataset GetSubDataset(int[] docIndices, bool destroyThisDataset, FileObjectStore <IntArrayFormatter> newBinsCache) { #endif int[] queryIndices = docIndices.Select(d => DocToQuery[d]).ToArray(); ulong[] uniqueQueryIds = queryIndices.Distinct().Select(q => QueryIds[q]).ToArray(); // calculate boundaries int[] boundaries = new int[uniqueQueryIds.Length + 1]; boundaries[0] = 0; int queryIndex = 1; for (int q = 1; q < queryIndices.Length; ++q) { if (queryIndices[q] != queryIndices[q - 1]) { boundaries[queryIndex++] = q; } } boundaries[uniqueQueryIds.Length] = queryIndices.Length; // construct skeleton DatasetSkeleton datasetSkeleton = new DatasetSkeleton(docIndices.Select(d => Ratings[d]).ToArray(), boundaries, uniqueQueryIds, docIndices.Select(d => DocIds[d]).ToArray()); // create features FeatureFlockBase[] features = new FeatureFlockBase[NumFlocks]; int[][] assignment = new int[][] { docIndices }; Parallel.For(0, NumFlocks, new ParallelOptions { MaxDegreeOfParallelism = BlockingThreadPool.NumThreads }, (int flockIndex) => { #if !NO_STORE GetSubDataset_ThreadWorker(features, flockIndex, assignment, destroyThisDataset, newBinsCache); #else GetSubDatasetThreadWorker(features, flockIndex, assignment, destroyThisDataset); #endif }); uint[] filteredDupeIds = null; // Filter the dupe ids, if any if (DupeIds != null) { uint[] dupeIds = DupeIds; filteredDupeIds = docIndices.Select(i => dupeIds[i]).ToArray(); } // auxiliary data Dictionary <string, DatasetSkeletonQueryDocData> auxData = _datasetSkeleton.AuxiliaryData; Dictionary <string, DatasetSkeletonQueryDocData> newAuxData = new Dictionary <string, DatasetSkeletonQueryDocData>(); foreach (KeyValuePair <string, DatasetSkeletonQueryDocData> pair in auxData) { newAuxData[pair.Key] = pair.Value.GetSubset(pair.Value.IsQueryLevel ? queryIndices.Distinct().ToArray() : docIndices); } datasetSkeleton.AuxiliaryData = newAuxData; // create new Dataset Dataset dataset = new Dataset(datasetSkeleton, features); dataset.DupeIds = filteredDupeIds; return(dataset); }