Beispiel #1
0
        public async Task <long> DeleteAsync(string symbol)
        {
            var rtn = await _segments.DeleteManyAsync(BF.Eq("symbol", symbol));

            //Log.Information ("Deleted {count} segments for {symbol}", rtn.DeletedCount, symbol);
            rtn = await _versions.DeleteManyAsync(BF.Eq("symbol", symbol));

            //Log.Information ("Deleted {count} versions for {symbol}", rtn.DeletedCount, symbol);
            return(rtn.DeletedCount);
        }
Beispiel #2
0
        public async Task <BsonDocument> GetNewVersion(string symbol, BsonDocument version = null)
        {
            version = version ?? new BsonDocument();

            var version_num = await _version_numbers.FindOneAndUpdateAsync(
                BF.Eq("symbol", symbol),
                BU.Inc("version", 1),
                new FindOneAndUpdateOptions <BsonDocument> {
                IsUpsert       = true,
                ReturnDocument = ReturnDocument.After
            }
                );

            version ["version"] = version_num.Unwrap(v => v["version"], 1);
            if (version.GetValue("_id", null) == null)
            {
                version ["_id"] = new BsonObjectId(ObjectId.GenerateNewId());
            }
            version ["symbol"] = symbol;
            return(version);
        }
Beispiel #3
0
        public async Task <DataFrame> ReadAsync(string symbol, DateRange range = null, BsonDocument version = null)
        {
            version = version ?? await ReadVersionAsync(symbol);

            if (version == null)
            {
                return(null);
            }
            Log.Debug("version: {0}".Args(version));

            var buf = new ByteBuffer();

            var index  = GetSegmentsIndex(version);
            var id     = version["_id"];
            var parent = version.GetValue("base_version_id", null);

            if (parent == null)
            {
                parent = id;
            }

            var filter        = BF.Eq("symbol", symbol) & BF.Eq("parent", parent);
            int start_segment = 0;
            int end_segment   = -1;

            if (range != null)
            {
                foreach (var t in index)
                {
                    if (range.StartDate != default(DateTime) && range.StartDate > t.Item1) // t.Item1 is inclusive end date of segment
                    {
                        start_segment = (int)t.Item2 + 1;                                  // should start from next segment
                    }
                    if (range.StopDate != default(DateTime) && range.StopDate <= t.Item1 && end_segment == -1)
                    {
                        end_segment = (int)t.Item2;      // should stop at this segment
                    }
                }
                if (start_segment != 0)
                {
                    filter = filter & BF.Gte("segment", start_segment);
                }
                if (end_segment != -1)
                {
                    filter = filter & BF.Lte("segment", end_segment);
                }
            }
            if (end_segment == -1)
            {
                end_segment = version["up_to"].AsInt32 - 1;
            }

            var segments = await this._segments.FindAsync(filter);

            int segcount = 0;

            while (await segments.MoveNextAsync())
            {
                foreach (var segment in segments.Current)
                {
#if DEBUG
                    //Log.Debug ("read segment: {0}".Args(segment));
#endif
                    var chunk = segment["data"].AsByteArray;
                    if (segment ["compressed"].AsBoolean)
                    {
                        buf.AppendDecompress(chunk);
                    }
                    else
                    {
                        buf.Append(chunk);
                    }
                    segcount++;
                }
            }
            var metadata = version.GetValue("metadata", new BsonDocument()).AsBsonDocument;
            if (segcount == 0)
            {
                //var df1 = new DataFrame();
                //df1.Metadata = metadata;
                //return df1;
                //throw new InvalidOperationException("No segments found for {0}".Args(version));
            }

            var nrows   = end_segment - start_segment + 1;
            var dtype   = version ["dtype"].AsString;
            var buftype = new DType(dtype);
            var bytes   = buf.GetBytes();
            Log.Debug("converting to dataframe up_to={0} dtype={1} len={2}".Args(nrows, dtype, bytes.Length));
            var df         = DataFrame.FromBuffer(buf.GetBytes(), buftype, nrows);
            var meta       = version["dtype_metadata"].AsBsonDocument;
            var index_name = meta.GetValue("index", new BsonArray()).AsBsonArray[0];
            if (index_name != null)
            {
                df.Index = df.Columns[index_name.AsString];
            }
            df.Metadata    = metadata;
            df.Name        = symbol;
            df.FilledCount = df.Rows.Count;
            // TODO: Filter first/last segment
            return(df);
        }
Beispiel #4
0
        public async Task <BsonDocument> AppendAsync(string symbol, DataFrame df, int chunksize = 0, bool skipAlreadyWrittenDates = true)
        {
            if (df.Index == null)
            {
                throw new ArgumentException("Please specify DataFrame.Index column before saving");
            }

            if (chunksize > 0 && df.Rows.Count > chunksize)
            {
                var          rng         = Range.R(0, chunksize);
                BsonDocument ver         = null;
                int          chunkscount = 0;
                while (rng.First < df.Rows.Count)
                {
                    var chunk = df[rng];
                    ver = await AppendAsync(symbol, chunk);

                    rng = Range.R(rng.First + chunksize, rng.Last + chunksize);
                    chunkscount++;
                }
                return(ver);
            }
            int attemptNo = 0;

            for (;;)
            {
                var previous_version = await ReadVersionAsync(symbol);

                var version = await GetNewVersion(symbol, previous_version);

                /*var previous_version = await (_versions.AsQueryable ()
                 *  .Where (v => v ["symbol"] == symbol && v ["version"] < version ["version"])
                 *  .OrderByDescending (v => v ["version"]) as IAsyncCursorSource<BsonDocument>)
                 *  .FirstOrDefaultAsync ();*/

                var dtype = version.GetValue("dtype", "").ToString();
                Log.Debug("loaded dtype {0}", dtype);
                if (dtype != "" && df.DType.ToString() != dtype)
                {
                    // dtype changed. need reload old data and repack it.
                    throw new NotImplementedException("old dtype {0}, new dtype {1}: not implemented".Args(dtype, df.DType));
                }

                var sdt = df.DType.ToString();

                version["metadata"] = df.Metadata;

                version["dtype"] = sdt;
                Log.Debug("saved dtype {0}", sdt);

                version["shape"] = new BsonArray {
                    { -1 }
                };
                version["dtype_metadata"] = new BsonDocument {
                    { "index", new BsonArray {
                          { df.Index.Name }
                      } },
                    { "columns", new BsonArray(df.Columns.Select(c => c.Name).ToList()) }
                };
                version["type"]          = "pandasdf";
                version["segment_count"] = previous_version != null ? previous_version["segment_count"].AsInt32 + 1 : 1;
                version["append_count"]  = previous_version != null ? previous_version["append_count"].AsInt32 + 1 : 0;

                var  seg_ind_buf        = new ByteBuffer();
                int  segment_offset     = 0;
                bool is_date_time_index = DType.DateTime64.ToString().Equals(df.Index.DType.ToString());

                //version ["base_sha"] = version ["sha"];
                if (previous_version != null)
                {
                    var seg_ind = previous_version["segment_index"].AsByteArray;
                    seg_ind_buf.AppendDecompress(seg_ind);
                    segment_offset = previous_version["up_to"].AsInt32;
                    if (is_date_time_index && skipAlreadyWrittenDates)
                    {
                        long     date  = seg_ind_buf.Read <long>(seg_ind_buf.Length - 16);
                        DateTime dt    = DateTime64.ToDateTime(date);
                        var      range = df.Index.AsDateTime().RangeOf(dt, 0, df.FilledCount - 1, Location.GT);
                        if (range.Last <= range.First)
                        {
                            Log.Information($"Skipped DataFrame.Append because date {dt} already written for {symbol}");
                            return(null); // Hey all was skipped
                        }
                        else if (range.First != 0)
                        {
                            Log.Information($"Skipped DataFrame.Append initial {range.First} elements date {dt} already written for {symbol}");
                        }
                        df = df[range];
                    }
                }


                var up_to = segment_offset + df.Rows.Count;
                var buf   = new ByteBuffer();

                // add index that is last datetime + 0-based int64 index of last appended record like (segment_count-1)
                if (is_date_time_index && df.Rows.Count > 0)
                {
                    var date = df.Index.AsDateTime().Source[-1];
                    seg_ind_buf.Append <long>(date);
                    seg_ind_buf.Append <long>(up_to - 1);
                }

                var seg_ind_buf2 = new ByteBuffer();
                seg_ind_buf2.AppendCompress(seg_ind_buf.GetBytes());
                version["segment_index"] = seg_ind_buf2.GetBytes();

                version["up_to"] = up_to;
                var bin = df.ToBuffer();
                buf.AppendCompress(bin);

                var sha1 = SHA1.Create();

                var sha = version.GetValue("sha", null);
                if (sha == null)
                {
                    byte[] hashBytes = sha1.ComputeHash(bin);
                    version["sha"] = new BsonBinaryData(hashBytes);
                }

#if false
                var buf2 = new ByteBuffer();
                buf2.AppendDecompress(buf.GetBytes());
                var bin2 = buf2.GetBytes();
                if (!bin.SequenceEqual(bin2))
                {
                    throw new InvalidOperationException();
                }
                var df2 = DataFrame.FromBuffer(bin2, df.DType, df.Rows.Count);
#endif
                var segment = new BsonDocument {
                    { "symbol", symbol },
                    { "data", new BsonBinaryData(buf.GetBytes()) },
                    { "compressed", true },
                    { "segment", segment_offset + df.Rows.Count - 1 },
                    { "parent", new BsonArray {
                          version["_id"]
                      } },
                };

                var hash = new ByteBuffer();
                hash.Append(Encoding.ASCII.GetBytes(symbol));
                foreach (var key in segment.Names.OrderByDescending(x => x))
                {
                    var value = segment.GetValue(key);
                    if (value is BsonBinaryData)
                    {
                        hash.Append(value.AsByteArray);
                    }
                    else
                    {
                        var str = value.ToString();
                        hash.Append(Encoding.ASCII.GetBytes(str));
                    }
                }
                segment["sha"] = sha1.ComputeHash(hash.GetBytes());

                //await _versions.InsertOneAsync(version);
                try {
                    await _versions.ReplaceOneAsync(BF.Eq("symbol", symbol), version, Upsert);
                }catch (MongoWriteException e)
                {
                    Log.Information("Retrying append symbol {symbol}, attempt {attemptNo}", symbol, attemptNo++);
                    continue;
                }
                await _segments.InsertOneAsync(segment);

                //Log.Information ("inserted new segment {segment} for symbol {symbol}", segment["_id"], symbol);
                //Log.Information ("replaced version {0} for symbol {symbol} sha1 {sha}", version["_id"], symbol, sha);

                // update parents in versions
                //var res = await _segments.UpdateManyAsync (BF.Eq("symbol", symbol), BU.Set ("parent", new BsonArray{ version ["_id"] }));
                //Log.Debug ("updated segments parents {0}".Args(res.MatchedCount));
                return(version);
            }
        }