Exemplo n.º 1
0
        private NodeId[] InternalPersist(TreeWrite sequence, int tryCount)
        {
            // NOTE: nodes are written in order of branches and then leaf nodes. All
            //   branch nodes and leafs are grouped together.

            // The list of nodes to be allocated,
            IList<ITreeNode> allBranches = sequence.BranchNodes;
            IList<ITreeNode> allLeafs = sequence.LeafNodes;
            List<ITreeNode> nodes = new List<ITreeNode>(allBranches.Count + allLeafs.Count);
            nodes.AddRange(allBranches);
            nodes.AddRange(allLeafs);
            int sz = nodes.Count;
            // The list of allocated referenced for the nodes,
            DataAddress[] refs = new DataAddress[sz];
            NodeId[] outNodeIds = new NodeId[sz];

            MessageStream allocateMessageStream = new MessageStream();

            // Allocate the space first,
            for (int i = 0; i < sz; ++i) {
                ITreeNode node = nodes[i];
                // Is it a branch node?
                if (node is TreeBranch) {
                    // Branch nodes are 1K in size,
                    allocateMessageStream.AddMessage(new Message("allocateNode", 1024));
                }
                    // Otherwise, it must be a leaf node,
                else {
                    // Leaf nodes are 4k in size,
                    allocateMessageStream.AddMessage(new Message("allocateNode", 4096));
                }
            }

            // Process a command on the manager,
            IEnumerable<Message> resultStream = ProcessManager(allocateMessageStream);

            // The unique list of blocks,
            List<BlockId> uniqueBlocks = new List<BlockId>();

            // Parse the result stream one message at a time, the order will be the
            // order of the allocation messages,
            int n = 0;
            foreach (Message m in resultStream) {
                if (m.HasError)
                    throw new ApplicationException(m.ErrorMessage);

                DataAddress addr = (DataAddress) m.Arguments[0].Value;
                refs[n] = addr;
                // Make a list of unique block identifiers,
                if (!uniqueBlocks.Contains(addr.BlockId)) {
                    uniqueBlocks.Add(addr.BlockId);
                }
                ++n;
            }

            // Get the block to server map for each of the blocks,

            IDictionary<BlockId, IList<BlockServerElement>> blockToServerMap =
                GetServerListForBlocks(uniqueBlocks);

            // Make message streams for each unique block
            int ubidCount = uniqueBlocks.Count;
            MessageStream[] ubidStream = new MessageStream[ubidCount];
            for (int i = 0; i < ubidStream.Length; ++i) {
                ubidStream[i] = new MessageStream();
            }

            // Scan all the blocks and create the message streams,
            for (int i = 0; i < sz; ++i) {

                byte[] nodeBuf;

                ITreeNode node = nodes[i];
                // Is it a branch node?
                if (node is TreeBranch) {
                    TreeBranch branch = (TreeBranch) node;
                    // Make a copy of the branch (NOTE; we clone() the array here).
                    long[] curNodeData = (long[]) branch.NodeData.Clone();
                    int curNdsz = branch.NodeDataSize;
                    branch = new TreeBranch(refs[i].Value, curNodeData, curNdsz);

                    // The number of children
                    int chsz = branch.ChildCount;
                    // For each child, if it's a heap node, look up the child id and
                    // reference map in the sequence and set the reference accordingly,
                    for (int o = 0; o < chsz; ++o) {
                        NodeId childId = branch.GetChild(o);
                        if (childId.IsInMemory) {
                            // The ref is currently on the heap, so adjust accordingly
                            int refId = sequence.LookupRef(i, o);
                            branch.SetChildOverride(refs[refId].Value, o);
                        }
                    }

                    // Turn the branch into a 'node_buf' byte[] array object for
                    // serialization.
                    long[] nodeData = branch.NodeData;
                    int ndsz = branch.NodeDataSize;
                    MemoryStream bout = new MemoryStream(1024);
                    BinaryWriter dout = new BinaryWriter(bout);
                    dout.Write(StoreBranchType);
                    dout.Write((short) 0); // Reserved for future
                    dout.Write(0); // The crc32 checksum will be written here,
                    dout.Write(ndsz);
                    for (int o = 0; o < ndsz; ++o) {
                        dout.Write(nodeData[o]);
                    }
                    dout.Flush();

                    // Turn it into a byte array,
                    nodeBuf = bout.ToArray();

                    // Write the crc32 of the data,
                    Crc32 checksum = new Crc32();
                    checksum.ComputeHash(nodeBuf, 8, nodeBuf.Length - 8);
                    ByteBuffer.WriteInt4((int) checksum.CrcValue, nodeBuf, 4);

                    // Put this branch into the local cache,
                    networkCache.SetNode(refs[i], branch);

                }
                    // If it's a leaf node,
                else {
                    TreeLeaf leaf = (TreeLeaf) node;
                    int lfsz = leaf.Length;

                    nodeBuf = new byte[lfsz + 12];

                    // Format the data,
                    ByteBuffer.WriteInt2(StoreLeafType, nodeBuf, 0);
                    ByteBuffer.WriteInt2(0, nodeBuf, 2); // Reserved for future
                    ByteBuffer.WriteInt4(lfsz, nodeBuf, 8);
                    leaf.Read(0, nodeBuf, 12, lfsz);

                    // Calculate and set the checksum,
                    Crc32 checksum = new Crc32();
                    checksum.ComputeHash(nodeBuf, 8, nodeBuf.Length - 8);
                    ByteBuffer.WriteInt4((int) checksum.CrcValue, nodeBuf, 4);

                    // Put this leaf into the local cache,
                    leaf = new MemoryTreeLeaf(refs[i].Value, nodeBuf);
                    networkCache.SetNode(refs[i], leaf);

                }

                // The DataAddress this node is being written to,
                DataAddress address = refs[i];
                // Get the block id,
                BlockId blockId = address.BlockId;
                int bid = uniqueBlocks.IndexOf(blockId);
                ubidStream[bid].AddMessage(new Message("writeToBlock", address, nodeBuf, 0, nodeBuf.Length));

                // Update 'out_refs' array,
                outNodeIds[i] = refs[i].Value;
            }

            // A log of successfully processed operations,
            List<object> successProcess = new List<object>(64);

            // Now process the streams on the servers,
            for (int i = 0; i < ubidStream.Length; ++i) {
                // The output message,
                MessageStream outputStream = ubidStream[i];
                // Get the servers this message needs to be sent to,
                BlockId blockId = uniqueBlocks[i];
                IList<BlockServerElement> blockServers = blockToServerMap[blockId];
                // Format a message for writing this node out,
                int bssz = blockServers.Count;
                IMessageProcessor[] blockServerProcs = new IMessageProcessor[bssz];
                // Make the block server connections,
                for (int o = 0; o < bssz; ++o) {
                    IServiceAddress address = blockServers[o].Address;
                    blockServerProcs[o] = connector.Connect(address, ServiceType.Block);
                    IEnumerable<Message> inputStream = blockServerProcs[o].Process(outputStream);
                    ++NetworkCommCount;

                    foreach (Message m in inputStream) {
                        if (m.HasError) {
                            // If this is an error, we need to report the failure to the
                            // manager server,
                            ReportBlockServerFailure(address);
                            // Remove the block id from the server list cache,
                            networkCache.RemoveServersWithBlock(blockId);

                            // Rollback any server writes already successfully made,
                            for (int p = 0; p < successProcess.Count; p += 2) {
                                IServiceAddress blocksAddr = (IServiceAddress) successProcess[p];
                                MessageStream toRollback = (MessageStream) successProcess[p + 1];

                                List<DataAddress> rollbackNodes = new List<DataAddress>(128);
                                foreach (Message rm in toRollback) {
                                    DataAddress raddr = (DataAddress) rm.Arguments[0].Value;
                                    rollbackNodes.Add(raddr);
                                }
                                // Create the rollback message,
                                MessageStream rollbackMsg = new MessageStream();
                                rollbackMsg.AddMessage(new Message("rollbackNodes", new object[] {rollbackNodes.ToArray()}));

                                // Send it to the block server,
                                IEnumerable<Message> responseStream = connector.Connect(blocksAddr, ServiceType.Block).Process(rollbackMsg);
                                ++NetworkCommCount;
                                foreach (Message rbm in responseStream) {
                                    // If rollback generated an error we throw the error now
                                    // because this likely is a serious network error.
                                    if (rbm.HasError) {
                                        throw new NetworkWriteException("Write failed (rollback failed): " + rbm.ErrorMessage);
                                    }
                                }

                            }

                            // Retry,
                            if (tryCount > 0)
                                return InternalPersist(sequence, tryCount - 1);

                            // Otherwise we fail the write
                            throw new NetworkWriteException(m.ErrorMessage);
                        }
                    }

                    // If we succeeded without an error, add to the log
                    successProcess.Add(address);
                    successProcess.Add(outputStream);

                }
            }

            // Return the references,
            return outNodeIds;
        }
Exemplo n.º 2
0
        public IList<ITreeNode> FetchNodes(NodeId[] nids)
        {
            // The number of nodes,
            int nodeCount = nids.Length;
            // The array of read nodes,
            ITreeNode[] resultNodes = new ITreeNode[nodeCount];

            // Resolve special nodes first,
            {
                int i = 0;
                foreach (NodeId nodeId in nids) {
                    if (nodeId.IsSpecial) {
                        resultNodes[i] = nodeId.CreateSpecialTreeNode();
                    }
                    ++i;
                }
            }

            // Group all the nodes to the same block,
            List<BlockId> uniqueBlocks = new List<BlockId>();
            List<List<NodeId>> uniqueBlockList = new List<List<NodeId>>();
            {
                int i = 0;
                foreach (NodeId nodeId in nids) {
                    // If it's not a special node,
                    if (!nodeId.IsSpecial) {
                        // Get the block id and add it to the list of unique blocks,
                        DataAddress address = new DataAddress(nodeId);
                        // Check if the node is in the local cache,
                        ITreeNode node = networkCache.GetNode(address);
                        if (node != null) {
                            resultNodes[i] = node;
                        } else {
                            // Not in the local cache so we need to bundle this up in a node
                            // request on the block servers,
                            // Group this node request by the block identifier
                            BlockId blockId = address.BlockId;
                            int ind = uniqueBlocks.IndexOf(blockId);
                            if (ind == -1) {
                                ind = uniqueBlocks.Count;
                                uniqueBlocks.Add(blockId);
                                uniqueBlockList.Add(new List<NodeId>());
                            }
                            List<NodeId> blist = uniqueBlockList[ind];
                            blist.Add(nodeId);
                        }
                    }
                    ++i;
                }
            }

            // Exit early if no blocks,
            if (uniqueBlocks.Count == 0) {
                return resultNodes;
            }

            // Resolve server records for the given block identifiers,
            IDictionary<BlockId, IList<BlockServerElement>> serversMap = GetServerListForBlocks(uniqueBlocks);

            // The result nodes list,
            List<ITreeNode> nodes = new List<ITreeNode>();

            // Checksumming objects
            byte[] checksumBuf = null;
            Crc32 crc32 = null;

            // For each unique block list,
            foreach (List<NodeId> blist in uniqueBlockList) {
                // Make a block server request for each node in the block,
                MessageStream blockServerMsg = new MessageStream();
                BlockId blockId = null;
                foreach (NodeId nodeId in blist) {
                    DataAddress address = new DataAddress(nodeId);
                    blockServerMsg.AddMessage(new Message("readFromBlock", address));
                    blockId = address.BlockId;
                }

                if (blockId == null) {
                    throw new ApplicationException("block_id == null");
                }

                // Get the shuffled list of servers the block is stored on,
                IList<BlockServerElement> servers = serversMap[blockId];

                // Go through the servers one at a time to fetch the block,
                bool success = false;
                for (int z = 0; z < servers.Count && !success; ++z) {
                    BlockServerElement server = servers[z];
                    // If the server is up,
                    if (server.IsStatusUp) {
                        // Open a connection with the block server,
                        IMessageProcessor blockServerProc = connector.Connect(server.Address, ServiceType.Block);
                        IEnumerable<Message> messageIn = blockServerProc.Process(blockServerMsg);
                        ++NetworkCommCount;
                        ++NetworkFetchCommCount;

                        bool isError = false;
                        bool severeError = false;
                        bool crcError = false;
                        bool connectionError = false;

                        // Turn each none-error message into a node
                        foreach (Message m in messageIn) {
                            if (m.HasError) {
                                // See if this error is a block read error. If it is, we don't
                                // tell the manager server to lock this server out completely.
                                bool isBlockReadError = m.Error.Source.Equals("Deveel.Data.Net.BlockReadException");
                                // If it's a connection fault,
                                if (IsConnectionFailMessage(m)) {
                                    connectionError = true;
                                } else if (!isBlockReadError) {
                                    // If it's something other than a block read error or
                                    // connection failure, we set the severe flag,
                                    severeError = true;
                                }
                                isError = true;
                            } else if (isError == false) {
                                // The reply contains the block of data read.
                                NodeSet nodeSet = (NodeSet) m.Arguments[0].Value;

                                DataAddress address = null;

                                // Catch any IOExceptions (corrupt zips, etc)
                                try {
                                    // Decode the node items into Java node objects,
                                    foreach (Node nodeItem in nodeSet) {
                                        NodeId nodeId = nodeItem.Id;

                                        address = new DataAddress(nodeId);
                                        // Wrap around a buffered DataInputStream for reading values
                                        // from the store.
                                        BinaryReader input = new BinaryReader(nodeItem.Input);
                                        short nodeType = input.ReadInt16();

                                        ITreeNode readNode = null;

                                        if (crc32 == null)
                                            crc32 = new Crc32();
                                        crc32.Initialize();

                                        // Is the node type a leaf node?
                                        if (nodeType == StoreLeafType) {
                                            // Read the checksum,
                                            input.ReadInt16(); // For future use...
                                            int checksum = input.ReadInt32();
                                            // Read the size
                                            int leafSize = input.ReadInt32();

                                            byte[] buf = StreamUtil.AsBuffer(nodeItem.Input);
                                            if (buf == null) {
                                                buf = new byte[leafSize + 12];
                                                ByteBuffer.WriteInt4(leafSize, buf, 8);
                                                input.Read(buf, 12, leafSize);
                                            }

                                            // Check the checksum...
                                            crc32.ComputeHash(buf, 8, leafSize + 4);
                                            int calcChecksum = (int) crc32.CrcValue;
                                            if (checksum != calcChecksum) {
                                                // If there's a CRC failure, we reject his node,
                                                log.Warning(String.Format("CRC failure on node {0} @ {1}", nodeId, server.Address));
                                                isError = true;
                                                crcError = true;
                                                // This causes the read to retry on a different server
                                                // with this block id
                                            } else {
                                                // Create a leaf that's mapped to this data
                                                ITreeNode leaf = new MemoryTreeLeaf(nodeId, buf);
                                                readNode = leaf;
                                            }

                                        }
                                            // Is the node type a branch node?
                                        else if (nodeType == StoreBranchType) {
                                            // Read the checksum,
                                            input.ReadInt16(); // For future use...
                                            int checksum = input.ReadInt32();

                                            // Check the checksum objects,
                                            if (checksumBuf == null)
                                                checksumBuf = new byte[8];

                                            // Note that the entire branch is loaded into memory,
                                            int childDataSize = input.ReadInt32();
                                            ByteBuffer.WriteInt4(childDataSize, checksumBuf, 0);
                                            crc32.ComputeHash(checksumBuf, 0, 4);
                                            long[] dataArr = new long[childDataSize];
                                            for (int n = 0; n < childDataSize; ++n) {
                                                long item = input.ReadInt64();
                                                ByteBuffer.WriteInt8(item, checksumBuf, 0);
                                                crc32.ComputeHash(checksumBuf, 0, 8);
                                                dataArr[n] = item;
                                            }

                                            // The calculated checksum value,
                                            int calcChecksum = (int) crc32.CrcValue;
                                            if (checksum != calcChecksum) {
                                                // If there's a CRC failure, we reject his node,
                                                log.Warning(String.Format("CRC failure on node {0} @ {1}", nodeId, server.Address));
                                                isError = true;
                                                crcError = true;
                                                // This causes the read to retry on a different server
                                                // with this block id
                                            } else {
                                                // Create the branch node,
                                                TreeBranch branch =
                                                    new TreeBranch(nodeId, dataArr, childDataSize);
                                                readNode = branch;
                                            }

                                        } else {
                                            log.Error(String.Format("Unknown node {0} type: {1}", address, nodeType));
                                            isError = true;
                                        }

                                        // Is the node already in the list? If so we don't add it.
                                        if (readNode != null && !IsInNodeList(nodeId, nodes)) {
                                            // Put the read node in the cache and add it to the 'nodes'
                                            // list.
                                            networkCache.SetNode(address, readNode);
                                            nodes.Add(readNode);
                                        }

                                    } // while (item_iterator.hasNext())

                                } catch (IOException e) {
                                    // This catches compression errors, as well as any other misc
                                    // IO errors.
                                    if (address != null) {
                                        log.Error(String.Format("IO Error reading node {0}", address));
                                    }
                                    log.Error(e.Message, e);
                                    isError = true;
                                }

                            }

                        } // for (Message m : message_in)

                        // If there was no error while reading the result, we assume the node
                        // requests were successfully read.
                        if (isError == false) {
                            success = true;
                        } else {
                            // If this is a connection failure, we report the block failure.
                            if (connectionError) {
                                // If this is an error, we need to report the failure to the
                                // manager server,
                                ReportBlockServerFailure(server.Address);
                                // Remove the block id from the server list cache,
                                networkCache.RemoveServersWithBlock(blockId);
                            } else {
                                String failType = "General";
                                if (crcError) {
                                    failType = "CRC Failure";
                                } else if (severeError) {
                                    failType = "Exception during process";
                                }

                                // Report to the first manager the block failure, so it may
                                // investigate and hopefully correct.
                                ReportBlockIdCorruption(server.Address, blockId, failType);

                                // Otherwise, not a severe error (probably a corrupt block on a
                                // server), so shuffle the server list for this block_id so next
                                // time there's less chance of hitting this bad block.
                                IEnumerable<BlockServerElement> srvs = networkCache.GetServersWithBlock(blockId);
                                if (srvs != null) {
                                    List<BlockServerElement> serverList = new List<BlockServerElement>();
                                    serverList.AddRange(srvs);
                                    CollectionsUtil.Shuffle(serverList);
                                    networkCache.SetServersForBlock(blockId, serverList, 15*60*1000);
                                }
                            }
                            // We will now go retry the query on the next block server,
                        }

                    }
                }

                // If the nodes were not successfully read, we generate an exception,
                if (!success) {
                    // Remove from the cache,
                    networkCache.RemoveServersWithBlock(blockId);
                    throw new ApplicationException(
                        "Unable to fetch node from a block server" +
                        " (block = " + blockId + ")");
                }
            }

            int sz = nodes.Count;
            if (sz == 0) {
                throw new ApplicationException("Empty nodes list");
            }

            for (int i = 0; i < sz; ++i) {
                ITreeNode node = nodes[i];
                NodeId nodeId = node.Id;
                for (int n = 0; n < nids.Length; ++n) {
                    if (nids[n].Equals(nodeId)) {
                        resultNodes[n] = node;
                    }
                }
            }

            // Check the result_nodes list is completely populated,
            for (int n = 0; n < resultNodes.Length; ++n) {
                if (resultNodes[n] == null) {
                    throw new ApplicationException("Assertion failed: result_nodes not completely populated.");
                }
            }

            return resultNodes;
        }