/// <summary> /// Присоеденить ноду /// </summary> /// <param name="node">Файловая нода</param> public int AddNode(NodeInfo node) { if (node != null && node.PartialFiles != null) { if (!node.PartialFiles.Any()) { node.PartialFiles.Add(new FileMeta(9) { FileName = "INIT_FILE", Indexes = new List <int> { 0 }, TotalBlockCount = 1, FileSize = 9 } ); } foreach (var file in node.PartialFiles) { NodeFileInfo nodeFileInfo; if (!Files.TryGetValue(file.FileName, out nodeFileInfo)) { nodeFileInfo = new NodeFileInfo(file.TotalBlockCount, file.FileSize); Files.Add(file.FileName, nodeFileInfo); } else { foreach (var info in nodeFileInfo.Nodes.ToList()) { if (info.HostName == node.NodeUrl) { nodeFileInfo.Nodes.Remove(info); } } } nodeFileInfo.Nodes.Add(new NodeBlockInfo { HostName = node.NodeUrl, Indexes = file.Indexes }); } } return(_configuration.BlockSize); }
/// <summary> /// Imports nodes and edges data into GraphView. /// Runs the following command to enable minimal logging, /// which will highly enhance the performance of bulk loading: /// USE master; ALTER DATABASE database_name SET RECOVERY BULK_LOGGED; /// </summary> /// <param name="nodesFileName"> The list of node file name(s)</param> /// <param name="edgesFileName"> the list of edge file name(s)</param> /// <param name="directory"> The directory of the node and edge data files</param> /// <param name="skipScanLabel"> True, notifies GraphView that every node file has only one label and /// every edge file has only one type. This will improve the performance of importing.</param> /// <param name="fieldTerminator"> The field terminator of data files</param> /// <param name="byDefaultType"> The default data type.</param> public void Import(IList<string> nodesFileName, IList<string> edgesFileName, string directory, bool skipScanLabel = false, string fieldTerminator = ",", string byDefaultType = "string") { if (!string.IsNullOrEmpty(directory)) { if (Directory.Exists(directory)) { nodesFileName = nodesFileName.Select(x => directory + "\\" + x).ToList(); edgesFileName = edgesFileName.Select(x => directory + "\\" + x).ToList(); } else { throw new BulkInsertNodeException(String.Format("The directory {0} does not exist.", directory)); } } if (FileInfo.TypeDict.ContainsKey(byDefaultType.ToLower())) { byDefaultType = FileInfo.TypeDict[byDefaultType.ToLower()]; } else { throw new BulkInsertNodeException("The type by default is not supported. The type supported includes:\n" + "int,long,float,double,boolean,byt,short,char,string\n"); } FileInfo.FieldTerminator = fieldTerminator; FileInfo.RowTerminator = "\r\n"; FileInfo.ByDefaultType = byDefaultType; FileInfo.SkipScanLabel = skipScanLabel; //Collects file header's information var nodeFileToInfo = new Dictionary<string, NodeFileInfo>(); foreach (var it in nodesFileName) { if (!File.Exists(it)) { throw new BulkInsertNodeException(String.Format("The file {0} does not exist.", it)); } else { var temp = new NodeFileInfo(it); temp.getHeader(); temp.ParseHeader(); nodeFileToInfo[it] = temp; } } var edgeFileToInfo = new Dictionary<string, EdgeFileInfo>(); foreach (var it in edgesFileName) { if (!File.Exists(it)) { throw new BulkInsertEdgeException(String.Format("The file {0} does not exist.", it)); } else { var temp = new EdgeFileInfo(it); temp.getHeader(); temp.ParseHeader(); edgeFileToInfo[it] = temp; } } var nameSpaceToNodeTableSet = new Dictionary<string, HashSet<string>>(); //Generates node table's information var nodeTableToInfo = new Dictionary<string, NodeInfo>(); foreach (var it in nodeFileToInfo) { NodeFileInfo nodeFile = it.Value; foreach (var iterator in nodeFile.Labels) { NodeInfo temp; if (nodeTableToInfo.ContainsKey(iterator)) { temp = nodeTableToInfo[iterator]; } else { temp = new NodeInfo(); } //Assigns properties foreach (var VARIABLE in nodeFile.ColumnToType) { if (!temp.AddProperty(VARIABLE.Key, VARIABLE.Value)) { throw new BulkInsertNodeException( String.Format( "The label \"{0}\" contains column \"{1}\" in different types in two different file", iterator, VARIABLE.Key)); } } //Assigns user id var userid = Tuple.Create(nodeFile.UserId.ToLower(), byDefaultType.ToLower()); if (temp.UserId != null && !(userid.Item1 == temp.UserId.Item1 || userid.Item2 == temp.UserId.Item2)) { throw new BulkInsertNodeException(String.Format("The label \"{0}\" contains two differenct ids in two node files", iterator)); } temp.UserId = userid; temp.tableName = iterator; nodeTableToInfo[iterator] = temp; //Updates name space dictionary if (!nameSpaceToNodeTableSet.ContainsKey(nodeFile.NameSpace)) { nameSpaceToNodeTableSet[nodeFile.NameSpace] = new HashSet<string>(); } HashSet<string> nodeTableSet = nameSpaceToNodeTableSet[nodeFile.NameSpace]; if (!nodeTableSet.Contains(iterator)) { nodeTableSet.Add(iterator); } } } //Generates edge file's information foreach (var it in edgeFileToInfo) { EdgeFileInfo edgeFile = it.Value; HashSet<string> startNodeTable = nameSpaceToNodeTableSet[edgeFile.StartNameSpace]; HashSet<string> endNodeTable = nameSpaceToNodeTableSet[edgeFile.EndNameSpace]; var edge = new EdgeInfo(); if (endNodeTable.Count > 2) { throw new BulkInsertEdgeException("One edge cannot refer to two different node tables"); } else if (endNodeTable.Count < 1) { throw new BulkInsertEdgeException( string.Format("Cannot find the namespace \"{0}\" in node files", edgeFile.EndNameSpace)); } foreach (var VARIABLE in endNodeTable) { edgeFile.sinkTable = edge.Sink = VARIABLE; } foreach (var VARIABLE in edgeFile.ColumnToType) { if (!edge.AddAtrribute(VARIABLE.Key, VARIABLE.Value)) { throw new BulkInsertEdgeException( string.Format("The Edge data file \"{0}\" contains two attributes of same name.", it.Key)); } } foreach (var iterator in edgeFile.Labels) { foreach (var VARIABLE in startNodeTable) { if (!nodeTableToInfo[VARIABLE].AddEdge(iterator, edge)) { throw new BulkInsertEdgeException( string.Format("There exists edge type \"{0}\" conflicts on node table \"{1}\" ", iterator, VARIABLE)); } } } } var transaction = Conn.BeginTransaction(); var command = Conn.CreateCommand(); command.Transaction = transaction; command.CommandTimeout = 0; try { //Creates node table foreach (var pair in nodeTableToInfo) { CreateNodeTable(pair.Value.ToString(), transaction); const string dropConstrain = @" ALTER TABLE {0} DROP CONSTRAINT {1}"; string constrainName = "dbo" + pair.Value.tableName + "_PK_GlobalNodeId"; command.CommandText = string.Format(dropConstrain, pair.Value.tableName, constrainName); command.ExecuteNonQuery(); string indexName = "dbo" + pair.Value.tableName + "_UQ_" + pair.Value.UserId.Item1; command.CommandText = string.Format(dropConstrain, pair.Value.tableName, indexName); command.ExecuteNonQuery(); } //Bulk inserts nodes foreach (var pair in nodeFileToInfo) { var nodeFile = pair.Value; //Bulk insert var dataColumnName = new List<string>(nodeFile.FileHeader.Count); var columnDataType = new List<string>(nodeFile.FileHeader.Count); using (var it = nodeFile.ColumnToType.GetEnumerator()) { for (int i = 0; i < nodeFile.FileHeader.Count; i++) { if (i == nodeFile.UserIdOffset) { dataColumnName.Add(nodeFile.UserId); columnDataType.Add(convertSqlType(byDefaultType)); } else if (i == nodeFile.LabelOffset) { dataColumnName.Add("label"); columnDataType.Add(convertSqlType("nvarchar(4000)")); } else { if (it.MoveNext()) { dataColumnName.Add(it.Current.Key); columnDataType.Add(convertSqlType(it.Current.Value)); } } } } foreach (var it in nodeFile.Labels) { var tableNameWithSchema = "dbo." + it; using (var sqlBulkCopy = new SqlBulkCopy(Conn, SqlBulkCopyOptions.TableLock, transaction)) { sqlBulkCopy.BulkCopyTimeout = 0; using ( var reader = skipScanLabel ? new BulkInsertFileDataReader(nodeFile.FileName, fieldTerminator, "\r\n", dataColumnName, columnDataType, true) : new BulkInsertFileDataReader(nodeFile.FileName, fieldTerminator, "\r\n", dataColumnName, columnDataType, true, nodeFile.LabelOffset, it)) { foreach (var variable in dataColumnName) { if (variable != "label") { sqlBulkCopy.ColumnMappings.Add(new SqlBulkCopyColumnMapping(variable, variable)); } } sqlBulkCopy.DestinationTableName = tableNameWithSchema; sqlBulkCopy.WriteToServer(reader); } } } } //Rebuilds cluster index on node table foreach (var pair in nodeTableToInfo) { const string createPrimaryKey = @" ALTER TABLE {0} ADD CONSTRAINT {1} PRIMARY KEY (GlobalNodeId)"; string constrainName = "dbo" + pair.Value.tableName + "_PK_GlobalNodeId"; command.CommandText = string.Format(createPrimaryKey, pair.Value.tableName, constrainName); command.ExecuteNonQuery(); const string dropIndex = @" ALTER TABLE {0} ADD CONSTRAINT {1} UNIQUE ({2})"; string indexName = "dbo" + pair.Value.tableName + "_UQ_" + pair.Value.UserId.Item1; command.CommandText = string.Format(dropIndex, pair.Value.tableName, indexName, pair.Value.UserId.Item1); command.ExecuteNonQuery(); } //Bulk inserts edges foreach (var pair in edgeFileToInfo) { var edgeFile = pair.Value; var dataColumnName = new List<string>(edgeFile.FileHeader.Count); var columnDataType = new List<string>(edgeFile.FileHeader.Count); using (var it = edgeFile.ColumnToType.GetEnumerator()) { for (int i = 0; i < edgeFile.FileHeader.Count; i++) { if (i == edgeFile.StartIdOffset) { dataColumnName.Add("startid"); columnDataType.Add(convertSqlType(byDefaultType)); } else if (i == edgeFile.EndIdOffset) { dataColumnName.Add("endid"); columnDataType.Add(convertSqlType(byDefaultType)); } if (i == edgeFile.LabelOffset) { dataColumnName.Add("type"); columnDataType.Add(convertSqlType(byDefaultType)); } else { if (it.MoveNext()) { dataColumnName.Add(it.Current.Key); columnDataType.Add(convertSqlType(it.Current.Value)); } } } } HashSet<string> startNodeTable = nameSpaceToNodeTableSet[edgeFile.StartNameSpace]; foreach (var edgeColumnName in edgeFile.Labels) { //Create temp table for bulk inserting edge data var randomTempTableName = "dbo." + edgeColumnName + edgeFile.sinkTable + "_" + RandomString(); var attributes = string.Join(",\n", edgeFile.ColumnToType.Select(x => x.Key + " " + x.Value)); const string createTempTable = @" Create table {0} ( startid {1}, endid {1}, {2} )"; command.CommandText = string.Format(createTempTable, randomTempTableName, byDefaultType, attributes); command.ExecuteNonQuery(); //Bulk inset using (var sqlBulkCopy = new SqlBulkCopy(Conn, SqlBulkCopyOptions.TableLock, transaction)) { sqlBulkCopy.BulkCopyTimeout = 0; using ( var reader = skipScanLabel ? new BulkInsertFileDataReader(edgeFile.FileName, fieldTerminator, "\r\n", dataColumnName, columnDataType, true) : new BulkInsertFileDataReader(edgeFile.FileName, fieldTerminator, "\r\n", dataColumnName, columnDataType, true, edgeFile.LabelOffset, edgeColumnName)) { foreach (var it in dataColumnName) { if (it != "type") { sqlBulkCopy.ColumnMappings.Add(it, it); } } sqlBulkCopy.DestinationTableName = randomTempTableName; sqlBulkCopy.WriteToServer(reader); } } //Creates clustered index on sink node in temp table string clusteredIndexName = "sinkIndex_" + RandomString(); const string createClusteredIndex = @" create clustered index [{0}] on {1}([endid])"; command.Parameters.Clear(); command.CommandText = string.Format(createClusteredIndex, clusteredIndexName, randomTempTableName); command.ExecuteNonQuery(); foreach (var sourceTableName in startNodeTable) { //Updates database string aggregeteFunctionName = "dbo_" + sourceTableName + '_' + edgeColumnName + '_' + "Encoder"; var tempStringForVariable = string.Join(", ", edgeFile.ColumnToType.Select(x => x.Key)); if (!string.IsNullOrEmpty(tempStringForVariable)) { tempStringForVariable = "," + tempStringForVariable; } string aggregateFunction = aggregeteFunctionName + "([sinkTable].[GlobalNodeId]" + tempStringForVariable + ")"; const string updateEdgeData = @" Select [{0}].globalnodeid, [GraphView_InsertEdgeInternalTable].binary, [GraphView_InsertEdgeInternalTable].sinkCount into #ParallelOptimalTempTable From ( Select tempTable.[{2}] source, [{3}].{4} as binary, count([sinkTable].[GlobalNodeId]) as sinkCount From {5} tempTable Join [{3}].[{6}] sinkTable On sinkTable.[{7}] = tempTable.[{8}] Group by tempTable.[{2}] ) as [GraphView_InsertEdgeInternalTable], [{3}].[{0}] Where [{0}].[{9}] = [GraphView_InsertEdgeInternalTable].source; UPDATE [{3}].[{0}] SET {1} .WRITE(temp.[binary], null, null), {1}OutDegree += sinkCount from #ParallelOptimalTempTable temp where temp.globalnodeid = [{0}].globalnodeid; drop table #ParallelOptimalTempTable;"; command.Parameters.Clear(); var sinkTableId = nodeTableToInfo[edgeFile.sinkTable].UserId.Item1; var sourceTableId = nodeTableToInfo[sourceTableName].UserId.Item1; command.CommandText = string.Format(updateEdgeData, sourceTableName, edgeColumnName, "startid", "dbo", aggregateFunction, randomTempTableName, edgeFile.sinkTable, sinkTableId, "endid", sourceTableId); command.ExecuteNonQuery(); const string updateReversedEdgeData = @" UPDATE [{3}].[{0}] SET [InDegree] += sourceCount From ( Select tempTable.[{1}] as Sink, count(*) as sourceCount From {2} tempTable Join [{5}] On [{5}].[{6}] = tempTable.[{7}] Group by tempTable.[{1}] ) as [GraphView_InsertEdgeInternalTable] Where [GraphView_InsertEdgeInternalTable].Sink = [{0}].[{4}]"; command.CommandText = string.Format(updateReversedEdgeData, edgeFile.sinkTable, "endid", randomTempTableName, "dbo", sinkTableId, sourceTableName, sourceTableId, "startid"); command.ExecuteNonQuery(); //Drops temp table const string dropTempTable = @" drop table {0}"; command.CommandText = string.Format(dropTempTable, randomTempTableName); command.ExecuteNonQuery(); } } } transaction.Commit(); } catch (Exception error) { transaction.Rollback(); throw new BulkInsertNodeException(error.Message); } }
/// <summary> /// Загрузить новый файл /// </summary> /// <param name="file">Файл</param> /// <param name="forceOwerwritte">Разрешить перезапись</param> /// <returns></returns> public async Task UploadFile(SourceFile file, bool forceOwerwritte = false) { string checkSum = CalculateMD5(file.Data); List <Block> parts = SplitFile(file); byte[] blockData = parts.SelectMany(b => b.Data).ToArray(); if (Files.TryGetValue(file.Name, out var nodeFile)) { if (forceOwerwritte) { nodeFile = new NodeFileInfo(parts.Count, file.Data.Length) { ContentType = file.ContentType, CheckSum = checkSum, }; } else { return; } } else { nodeFile = new NodeFileInfo(parts.Count, file.Data.Length) { ContentType = file.ContentType, CheckSum = checkSum }; Files.Add(file.Name, nodeFile); } Dictionary <string, List <Block> > hostBlocksList = OptimalHosts(parts); foreach (var hostBlocks in hostBlocksList) { nodeFile.Nodes.Add(new NodeBlockInfo { HostName = hostBlocks.Key, Priority = 0, Indexes = hostBlocks.Value.Select(v => v.Info.Index).ToList() }); foreach (var block in hostBlocks.Value) { await _nodeGateway.AddOrOverwritteBlock(hostBlocks.Key, file.Name, block.Data, block.Info.Index, block.Info.TotalBlockCount, forceOwerwritte); } } //foreach (var hostBlocks in hostBlocksList) //{ // nodeFile.Nodes.Add(new NodeBlockInfo // { // HostName = hostBlocks.Key, // Priority = 0, // Indexes = hostBlocks.Value.Select(v => v.Info.Index).ToList() // }); // await _nodeGateway.UploadBlocks(hostBlocks.Key, hostBlocks.Value, forceOwerwritte); //} }