/// <summary> /// Cluster data retrieved from a search engine or some other source registered in the /// DCS as a document source. /// </summary> private static void ClusterFromSearchEngine(MultipartFileUpload service, string sourceId, string query) { // The output format is XML. service.AddFormValue("dcs.output.format", "XML"); // This time we will be interested in both clusters and documents. service.AddFormValue("dcs.clusters.only", "false"); // Add query. service.AddFormValue("query", query); // Add the number of results. service.AddFormValue("results", "20"); // Specify the source. service.AddFormValue("dcs.source", sourceId); // Perform the actual query. byte[] response = service.Post(); // Parse the output and dump group headers. MemoryStream input = new MemoryStream(response); XmlDocument document = new XmlDocument(); document.PreserveWhitespace = true; document.Load(input); PrintResults(document); }
/// <summary> /// An example of clustering data stored in a local file and passed /// as part of the HTTP request. A query hint is provided for the /// clustering algorithm (to avoid trivial clusters). /// </summary> private static void ClusterFromFile(MultipartFileUpload service, string filePath, string queryHint) { using (FileStream fs = File.Open(filePath, FileMode.Open)) { ClusterFromStream(service, fs, queryHint); } }
public static void Main() { MultipartFileUpload service = new MultipartFileUpload(new Uri("http://localhost:8080/dcs/rest")); string examplePath = "..\\..\\..\\shared\\data-mining.xml"; if (!File.Exists(examplePath)) { Console.WriteLine("Input path does not exist: " + examplePath); return; } // Cluster directly from file (no buffering). Console.WriteLine("## Clustering documents from a file..."); ClusterFromFile(service, "..\\..\\..\\shared\\data-mining.xml", "data mining"); // Cluster from an XML in memory. string xml = File.ReadAllText(examplePath, System.Text.Encoding.UTF8); Console.WriteLine("## Clustering documents from an XML string..."); ClusterFromStream(service, new MemoryStream(System.Text.Encoding.UTF8.GetBytes(xml)), "data mining"); // Cluster form an external document source (on the DCS). Console.WriteLine("## Clustering search results from a search engine..."); ClusterFromSearchEngine(service, "etools", "data mining"); }
/// <summary> /// An example of clustering data from an arbitrary byte stream holding input XML for /// the DCS (can be an in-memory stream if clustering from a string). /// </summary> private static void ClusterFromStream(MultipartFileUpload service, Stream xmlStream, string queryHint) { // The output format is XML. service.AddFormValue("dcs.output.format", "XML"); // We don't need documents in the output, only clusters. service.AddFormValue("dcs.clusters.only", "true"); // Pass query hint. service.AddFormValue("query", queryHint); // The algorithm to use for clustering. Omit to select the default. An example of // using Lingo with custom parameters follows. service.AddFormValue("dcs.algorithm", "lingo"); service.AddFormValue("LingoClusteringAlgorithm.desiredClusterCountBase", "10"); service.AddFormValue("LingoClusteringAlgorithm.factorizationQuality", "LOW"); service.AddFormValue("LingoClusteringAlgorithm.factorizationFactory", "org.carrot2.matrix.factorization.PartialSingularValueDecompositionFactory"); // Add the XML stream here. service.AddFormStream("dcs.c2stream", "anything.xml", xmlStream); // Perform the actual query. byte[] response = service.Post(); // Parse the output and dump group headers. MemoryStream input = new MemoryStream(response); XmlDocument document = new XmlDocument(); document.PreserveWhitespace = true; document.Load(input); PrintResults(document); }
public async Task <ActionResult <UploadFileResponse> > StartFileUpload( [Required][FromBody] UploadFileRequestForm request) { if (!CheckNewItemName(request.Name, out var badRequest)) { return(badRequest !); } if (!remoteStorage.Configured) { throw new HttpResponseException() { Status = StatusCodes.Status500InternalServerError, Value = "Remote storage is not configured on the server", }; } // Disallow extensions with uppercase letters if (PathParser.IsExtensionUppercase(request.Name)) { return(BadRequest("File extension can't contain uppercase characters")); } // TODO: maybe in the future we'll want to allow anonymous uploads to certain folders var user = HttpContext.AuthenticatedUserOrThrow(); // Check write access StorageItem?parentFolder = null; if (request.ParentFolder != null) { parentFolder = await database.StorageItems.FirstOrDefaultAsync(i => i.Ftype == FileType.Folder && i.Id == request.ParentFolder.Value); if (parentFolder == null) { return(NotFound("Parent folder doesn't exist")); } } // Check if the item already exists (a new version is being uploaded) var parentId = parentFolder?.Id; var existingItem = await database.StorageItems.FirstOrDefaultAsync(i => i.ParentId == parentId && i.Name == request.Name); if (existingItem != null) { // New version of an existing item. User needs at least read access to the folder and // Root folder is publicly readable so that doesn't need to be checked here if (parentFolder != null) { if (!parentFolder.IsReadableBy(user)) { return(this.WorkingForbid("You don't have read access to the folder")); } } // Disallow file uploads to a folder item if (existingItem.Ftype != FileType.File) { return(BadRequest("Can't upload a new file version to an item that is not a file")); } } else { // Write access required to make a new item if (parentFolder == null) { if (!user.HasAccessLevel(UserAccessLevel.Admin)) { return(this.WorkingForbid("Only admins can write to root folder")); } } else { if (!parentFolder.IsWritableBy(user)) { return(this.WorkingForbid("You don't have write access to the folder")); } } } if (existingItem == null) { existingItem = new StorageItem() { Name = request.Name, Ftype = FileType.File, ReadAccess = request.ReadAccess, WriteAccess = request.WriteAccess, AllowParentless = parentId == null, Parent = parentFolder, OwnerId = user.Id, }; await database.StorageItems.AddAsync(existingItem); } var version = await existingItem.CreateNextVersion(database); var file = await version.CreateStorageFile(database, DateTime.UtcNow + AppInfo.RemoteStorageUploadExpireTime, request.Size); string?uploadUrl = null; MultipartFileUpload?multipart = null; long? multipartId = null; string?uploadId = null; if (request.Size >= AppInfo.FileSizeBeforeMultipartUpload) { // Multipart upload is recommended for large files, as large files are hard to make go through // in a reasonable time with a single PUT request try { uploadId = await remoteStorage.CreateMultipartUpload(file.UploadPath, request.MimeType); if (uploadId == null) { throw new Exception("returned uploadId is null"); } } catch (Exception e) { logger.LogError("Failed to create multipart upload: {@E}", e); return(Problem("Failed to create a new multipart upload")); } var chunks = ComputeChunksForFile(request.Size).ToList(); var initialChunksToUpload = AddUploadUrlsToChunks(chunks.Take(AppInfo.MultipartSimultaneousUploads * AppInfo.MultipartUploadPartsToReturnInSingleCall), file.UploadPath, uploadId, AppInfo.RemoteStorageUploadExpireTime).ToList(); var multipartModel = new InProgressMultipartUpload() { UploadId = uploadId, Path = file.UploadPath, NextChunkIndex = initialChunksToUpload.Count, }; await database.InProgressMultipartUploads.AddAsync(multipartModel); await database.SaveChangesAsync(); multipartId = multipartModel.Id; var chunkToken = new ChunkRetrieveToken(multipartModel.Id, file.Id, uploadId); var chunkTokenStr = JsonSerializer.Serialize(chunkToken); multipart = new MultipartFileUpload() { ChunkRetrieveToken = chunkDataProtector.Protect(chunkTokenStr, AppInfo.MultipartUploadTotalAllowedTime), TotalChunks = chunks.Count, NextChunks = initialChunksToUpload, }; } else { // Normal upload (in a single PUT request) await database.SaveChangesAsync(); uploadUrl = remoteStorage.CreatePresignedUploadURL(file.UploadPath, AppInfo.RemoteStorageUploadExpireTime); } // Need to queue a job to calculate the parent folder size if (parentId != null) { jobClient.Enqueue <CountFolderItemsJob>((x) => x.Execute(parentId.Value, CancellationToken.None)); } if (uploadId != null) { jobClient.Schedule <DeleteNonFinishedMultipartUploadJob>((x) => x.Execute(uploadId, CancellationToken.None), AppInfo.MultipartUploadTotalAllowedTime * 2); } // TODO: queue a job to delete the version / UploadPath after a few hours if the upload fails var token = new UploadVerifyToken() { TargetStorageItem = existingItem.Id, TargetStorageItemVersion = version.Id, MultipartId = multipartId, }; var tokenStr = JsonSerializer.Serialize(token); return(new UploadFileResponse() { UploadURL = uploadUrl, Multipart = multipart, TargetStorageItem = existingItem.Id, TargetStorageItemVersion = version.Id, UploadVerifyToken = dataProtector.Protect(tokenStr, multipart == null ? AppInfo.RemoteStorageUploadExpireTime : AppInfo.MultipartUploadTotalAllowedTime), }); }