Esempio n. 1
0
            /// <summary>
            /// Given the auxiliary data in a bunch of parts, set the concatenated dataset appropriately.
            /// </summary>
            /// <param name="parts">The individual parts of the dataset</param>
            /// <param name="concat">The concatenated version of this dataset</param>
            private static void SetConcatenatedAuxiliaryData(DatasetSkeleton[] parts, DatasetSkeleton concat)
            {
                // Get the union of all the auxiliary data names.
                Dictionary <string, bool> auxNames = new Dictionary <string, bool>();

                foreach (DatasetSkeleton part in parts)
                {
                    foreach (string name in part.AuxiliaryData.Keys)
                    {
                        auxNames[name] = true;
                    }
                }
                DatasetSkeletonQueryDocData[] partsDatas = new DatasetSkeletonQueryDocData[parts.Length];
                int[] docLengths   = parts.Select(x => x.NumDocs).ToArray();
                int[] queryLengths = parts.Select(x => x.NumQueries).ToArray();
                foreach (string name in auxNames.Keys)
                {
                    for (int p = 0; p < parts.Length; ++p)
                    {
                        partsDatas[p] = parts[p].AuxiliaryData.ContainsKey(name) ? parts[p].AuxiliaryData[name] : default(DatasetSkeletonQueryDocData);
                    }
                    bool isQuery = partsDatas.First(pd => pd.Data != null).IsQueryLevel;
                    if (partsDatas.Any(pd => pd.Data != null && pd.IsQueryLevel != isQuery))
                    {
                        throw Contracts.Except("On auxiliary data {0}, disagreement on whether this is query/doc", name);
                    }
                    Array concatArray = ConcatArrays(partsDatas.Select(pd => pd.Data).ToArray(), isQuery ? queryLengths : docLengths, name);
                    concat.SetData(name, concatArray, isQuery);
                }
            }
Esempio n. 2
0
            public DatasetSkeleton[] Split(double[] fraction, int randomSeed, out int[][] assignment)
            {
                int[][] queries  = GetAssignments(fraction, randomSeed, out assignment);
                int     numParts = queries.Length;

                // get boundaries
                int[][] boundaries = queries.Select(q => new int[q.Length + 1]).ToArray(numParts);
                for (int p = 0; p < numParts; ++p)
                {
                    boundaries[p][0] = 0;
                    for (int q = 0; q < queries[p].Length; ++q)
                    {
                        boundaries[p][q + 1] = boundaries[p][q] + Boundaries[queries[p][q] + 1] - Boundaries[queries[p][q]];
                    }
                }

                // get docIds, queryIds, and labels
                short[][] ratings  = new short[numParts][];
                ulong[][] queryIds = new ulong[numParts][];
                ulong[][] docIds   = new ulong[numParts][];
                for (int p = 0; p < numParts; ++p)
                {
                    ratings[p]  = assignment[p].Select(d => Ratings[d]).ToArray();
                    queryIds[p] = queries[p].Select(q => QueryIds[q]).ToArray();
                    docIds[p]   = assignment[p].Select(d => DocIds[d]).ToArray();
                }

                // package everything up in datasetSkeleton objects
                DatasetSkeleton[] datasetSkeleton = Enumerable.Range(0, numParts).Select(
                    p => new DatasetSkeleton(ratings[p],
                                             boundaries[p],
                                             queryIds[p],
                                             docIds[p])).ToArray(numParts);

                // Do the auxiliary data.
                foreach (KeyValuePair <string, DatasetSkeletonQueryDocData> pair in AuxiliaryData)
                {
                    DatasetSkeletonQueryDocData qddata = pair.Value;
                    Type arrayDataType = qddata.Data.GetType().GetElementType();
                    for (int p = 0; p < numParts; ++p)
                    {
                        int[] mapping = (qddata.IsQueryLevel ? queries : assignment)[p];
                        Array newData = Array.CreateInstance(arrayDataType, mapping.Length);
                        for (int i = 0; i < mapping.Length; ++i)
                        {
                            newData.SetValue(qddata.Data.GetValue(mapping[i]), i);
                        }
                        datasetSkeleton[p].SetData(pair.Key, newData, qddata.IsQueryLevel);
                    }
                }

                return(datasetSkeleton);
            }
Esempio n. 3
0
            public DatasetSkeletonQueryDocData GetSubset(int[] docArray)
            {
                DatasetSkeletonQueryDocData qdd = new DatasetSkeletonQueryDocData();

                qdd.IsQueryLevel = IsQueryLevel;

                Type arrayDataType = Data.GetType().GetElementType();

                qdd.Data = Array.CreateInstance(arrayDataType, docArray.Length);
                for (int i = 0; i < docArray.Length; ++i)
                {
                    qdd.Data.SetValue(Data.GetValue(docArray[i]), i);
                }

                return(qdd);
            }