Пример #1
0
        /// <summary>
        /// Cluster data retrieved from a search engine or some other source registered in the
        /// DCS as a document source.
        /// </summary>
        private static void ClusterFromSearchEngine(MultipartFileUpload service, string sourceId, string query)
        {
            // The output format is XML.
            service.AddFormValue("dcs.output.format", "XML");

            // This time we will be interested in both clusters and documents.
            service.AddFormValue("dcs.clusters.only", "false");

            // Add query.
            service.AddFormValue("query", query);

            // Add the number of results.
            service.AddFormValue("results", "20");

            // Specify the source.
            service.AddFormValue("dcs.source", sourceId);

            // Perform the actual query.
            byte[] response = service.Post();

            // Parse the output and dump group headers.
            MemoryStream input = new MemoryStream(response);

            XmlDocument document = new XmlDocument();

            document.PreserveWhitespace = true;
            document.Load(input);

            PrintResults(document);
        }
Пример #2
0
 /// <summary>
 /// An example of clustering data stored in a local file and passed
 /// as part of the HTTP request. A query hint is provided for the
 /// clustering algorithm (to avoid trivial clusters).
 /// </summary>
 private static void ClusterFromFile(MultipartFileUpload service, string filePath, string queryHint)
 {
     using (FileStream fs = File.Open(filePath, FileMode.Open))
     {
         ClusterFromStream(service, fs, queryHint);
     }
 }
Пример #3
0
        public static void Main()
        {
            MultipartFileUpload service = new MultipartFileUpload(new Uri("http://localhost:8080/dcs/rest"));

            string examplePath = "..\\..\\..\\shared\\data-mining.xml";

            if (!File.Exists(examplePath))
            {
                Console.WriteLine("Input path does not exist: " + examplePath);
                return;
            }

            // Cluster directly from file (no buffering).
            Console.WriteLine("## Clustering documents from a file...");
            ClusterFromFile(service, "..\\..\\..\\shared\\data-mining.xml", "data mining");

            // Cluster from an XML in memory.
            string xml = File.ReadAllText(examplePath, System.Text.Encoding.UTF8);

            Console.WriteLine("## Clustering documents from an XML string...");
            ClusterFromStream(service,
                              new MemoryStream(System.Text.Encoding.UTF8.GetBytes(xml)), "data mining");

            // Cluster form an external document source (on the DCS).
            Console.WriteLine("## Clustering search results from a search engine...");
            ClusterFromSearchEngine(service, "etools", "data mining");
        }
Пример #4
0
        /// <summary>
        /// An example of clustering data from an arbitrary byte stream holding input XML for
        /// the DCS (can be an in-memory stream if clustering from a string).
        /// </summary>
        private static void ClusterFromStream(MultipartFileUpload service, Stream xmlStream, string queryHint)
        {
            // The output format is XML.
            service.AddFormValue("dcs.output.format", "XML");

            // We don't need documents in the output, only clusters.
            service.AddFormValue("dcs.clusters.only", "true");

            // Pass query hint.
            service.AddFormValue("query", queryHint);

            // The algorithm to use for clustering. Omit to select the default. An example of
            // using Lingo with custom parameters follows.

            service.AddFormValue("dcs.algorithm", "lingo");
            service.AddFormValue("LingoClusteringAlgorithm.desiredClusterCountBase", "10");
            service.AddFormValue("LingoClusteringAlgorithm.factorizationQuality", "LOW");
            service.AddFormValue("LingoClusteringAlgorithm.factorizationFactory",
                "org.carrot2.matrix.factorization.PartialSingularValueDecompositionFactory");

            // Add the XML stream here.
            service.AddFormStream("dcs.c2stream", "anything.xml", xmlStream);

            // Perform the actual query.
            byte[] response = service.Post();

            // Parse the output and dump group headers.
            MemoryStream input = new MemoryStream(response);

            XmlDocument document = new XmlDocument();
            document.PreserveWhitespace = true;
            document.Load(input);

            PrintResults(document);
        }
Пример #5
0
        public static void Main()
        {
            MultipartFileUpload service = new MultipartFileUpload(new Uri("http://localhost:8080/dcs/rest"));

            string examplePath = "..\\..\\..\\shared\\data-mining.xml";
            if (!File.Exists(examplePath))
            {
                Console.WriteLine("Input path does not exist: " + examplePath);
                return;
            }

            // Cluster directly from file (no buffering).
            Console.WriteLine("## Clustering documents from a file...");
            ClusterFromFile(service, "..\\..\\..\\shared\\data-mining.xml", "data mining");

            // Cluster from an XML in memory.
            string xml = File.ReadAllText(examplePath, System.Text.Encoding.UTF8);
            Console.WriteLine("## Clustering documents from an XML string...");
            ClusterFromStream(service, 
                new MemoryStream(System.Text.Encoding.UTF8.GetBytes(xml)), "data mining");

            // Cluster form an external document source (on the DCS).
            Console.WriteLine("## Clustering search results from a search engine...");
            ClusterFromSearchEngine(service, "etools", "data mining");
        }
Пример #6
0
        /// <summary>
        /// An example of clustering data from an arbitrary byte stream holding input XML for
        /// the DCS (can be an in-memory stream if clustering from a string).
        /// </summary>
        private static void ClusterFromStream(MultipartFileUpload service, Stream xmlStream, string queryHint)
        {
            // The output format is XML.
            service.AddFormValue("dcs.output.format", "XML");

            // We don't need documents in the output, only clusters.
            service.AddFormValue("dcs.clusters.only", "true");

            // Pass query hint.
            service.AddFormValue("query", queryHint);

            // The algorithm to use for clustering. Omit to select the default. An example of
            // using Lingo with custom parameters follows.

            service.AddFormValue("dcs.algorithm", "lingo");
            service.AddFormValue("LingoClusteringAlgorithm.desiredClusterCountBase", "10");
            service.AddFormValue("LingoClusteringAlgorithm.factorizationQuality", "LOW");
            service.AddFormValue("LingoClusteringAlgorithm.factorizationFactory",
                                 "org.carrot2.matrix.factorization.PartialSingularValueDecompositionFactory");

            // Add the XML stream here.
            service.AddFormStream("dcs.c2stream", "anything.xml", xmlStream);

            // Perform the actual query.
            byte[] response = service.Post();

            // Parse the output and dump group headers.
            MemoryStream input = new MemoryStream(response);

            XmlDocument document = new XmlDocument();

            document.PreserveWhitespace = true;
            document.Load(input);

            PrintResults(document);
        }
        public async Task <ActionResult <UploadFileResponse> > StartFileUpload(
            [Required][FromBody] UploadFileRequestForm request)
        {
            if (!CheckNewItemName(request.Name, out var badRequest))
            {
                return(badRequest !);
            }

            if (!remoteStorage.Configured)
            {
                throw new HttpResponseException()
                      {
                          Status = StatusCodes.Status500InternalServerError,
                          Value  = "Remote storage is not configured on the server",
                      };
            }

            // Disallow extensions with uppercase letters
            if (PathParser.IsExtensionUppercase(request.Name))
            {
                return(BadRequest("File extension can't contain uppercase characters"));
            }

            // TODO: maybe in the future we'll want to allow anonymous uploads to certain folders
            var user = HttpContext.AuthenticatedUserOrThrow();

            // Check write access
            StorageItem?parentFolder = null;

            if (request.ParentFolder != null)
            {
                parentFolder = await database.StorageItems.FirstOrDefaultAsync(i =>
                                                                               i.Ftype == FileType.Folder && i.Id == request.ParentFolder.Value);

                if (parentFolder == null)
                {
                    return(NotFound("Parent folder doesn't exist"));
                }
            }

            // Check if the item already exists (a new version is being uploaded)
            var parentId     = parentFolder?.Id;
            var existingItem =
                await database.StorageItems.FirstOrDefaultAsync(i => i.ParentId == parentId && i.Name == request.Name);

            if (existingItem != null)
            {
                // New version of an existing item. User needs at least read access to the folder and
                // Root folder is publicly readable so that doesn't need to be checked here
                if (parentFolder != null)
                {
                    if (!parentFolder.IsReadableBy(user))
                    {
                        return(this.WorkingForbid("You don't have read access to the folder"));
                    }
                }

                // Disallow file uploads to a folder item
                if (existingItem.Ftype != FileType.File)
                {
                    return(BadRequest("Can't upload a new file version to an item that is not a file"));
                }
            }
            else
            {
                // Write access required to make a new item
                if (parentFolder == null)
                {
                    if (!user.HasAccessLevel(UserAccessLevel.Admin))
                    {
                        return(this.WorkingForbid("Only admins can write to root folder"));
                    }
                }
                else
                {
                    if (!parentFolder.IsWritableBy(user))
                    {
                        return(this.WorkingForbid("You don't have write access to the folder"));
                    }
                }
            }

            if (existingItem == null)
            {
                existingItem = new StorageItem()
                {
                    Name            = request.Name,
                    Ftype           = FileType.File,
                    ReadAccess      = request.ReadAccess,
                    WriteAccess     = request.WriteAccess,
                    AllowParentless = parentId == null,
                    Parent          = parentFolder,
                    OwnerId         = user.Id,
                };

                await database.StorageItems.AddAsync(existingItem);
            }

            var version = await existingItem.CreateNextVersion(database);

            var file = await version.CreateStorageFile(database,
                                                       DateTime.UtcNow + AppInfo.RemoteStorageUploadExpireTime, request.Size);

            string?uploadUrl = null;
            MultipartFileUpload?multipart = null;
            long?  multipartId            = null;
            string?uploadId = null;

            if (request.Size >= AppInfo.FileSizeBeforeMultipartUpload)
            {
                // Multipart upload is recommended for large files, as large files are hard to make go through
                // in a reasonable time with a single PUT request
                try
                {
                    uploadId = await remoteStorage.CreateMultipartUpload(file.UploadPath, request.MimeType);

                    if (uploadId == null)
                    {
                        throw new Exception("returned uploadId is null");
                    }
                }
                catch (Exception e)
                {
                    logger.LogError("Failed to create multipart upload: {@E}", e);
                    return(Problem("Failed to create a new multipart upload"));
                }

                var chunks = ComputeChunksForFile(request.Size).ToList();
                var initialChunksToUpload = AddUploadUrlsToChunks(chunks.Take(AppInfo.MultipartSimultaneousUploads *
                                                                              AppInfo.MultipartUploadPartsToReturnInSingleCall), file.UploadPath, uploadId,
                                                                  AppInfo.RemoteStorageUploadExpireTime).ToList();

                var multipartModel = new InProgressMultipartUpload()
                {
                    UploadId       = uploadId,
                    Path           = file.UploadPath,
                    NextChunkIndex = initialChunksToUpload.Count,
                };

                await database.InProgressMultipartUploads.AddAsync(multipartModel);

                await database.SaveChangesAsync();

                multipartId = multipartModel.Id;

                var chunkToken = new ChunkRetrieveToken(multipartModel.Id, file.Id, uploadId);

                var chunkTokenStr = JsonSerializer.Serialize(chunkToken);

                multipart = new MultipartFileUpload()
                {
                    ChunkRetrieveToken =
                        chunkDataProtector.Protect(chunkTokenStr, AppInfo.MultipartUploadTotalAllowedTime),
                    TotalChunks = chunks.Count,
                    NextChunks  = initialChunksToUpload,
                };
            }
            else
            {
                // Normal upload (in a single PUT request)
                await database.SaveChangesAsync();

                uploadUrl = remoteStorage.CreatePresignedUploadURL(file.UploadPath,
                                                                   AppInfo.RemoteStorageUploadExpireTime);
            }

            // Need to queue a job to calculate the parent folder size
            if (parentId != null)
            {
                jobClient.Enqueue <CountFolderItemsJob>((x) => x.Execute(parentId.Value,
                                                                         CancellationToken.None));
            }

            if (uploadId != null)
            {
                jobClient.Schedule <DeleteNonFinishedMultipartUploadJob>((x) => x.Execute(uploadId,
                                                                                          CancellationToken.None), AppInfo.MultipartUploadTotalAllowedTime * 2);
            }

            // TODO: queue a job to delete the version / UploadPath after a few hours if the upload fails

            var token = new UploadVerifyToken()
            {
                TargetStorageItem        = existingItem.Id,
                TargetStorageItemVersion = version.Id,
                MultipartId = multipartId,
            };

            var tokenStr = JsonSerializer.Serialize(token);

            return(new UploadFileResponse()
            {
                UploadURL = uploadUrl,
                Multipart = multipart,
                TargetStorageItem = existingItem.Id,
                TargetStorageItemVersion = version.Id,
                UploadVerifyToken = dataProtector.Protect(tokenStr,
                                                          multipart == null ?
                                                          AppInfo.RemoteStorageUploadExpireTime :
                                                          AppInfo.MultipartUploadTotalAllowedTime),
            });
        }
Пример #8
0
 /// <summary>
 /// An example of clustering data stored in a local file and passed
 /// as part of the HTTP request. A query hint is provided for the
 /// clustering algorithm (to avoid trivial clusters).
 /// </summary>
 private static void ClusterFromFile(MultipartFileUpload service, string filePath, string queryHint)
 {
     using (FileStream fs = File.Open(filePath, FileMode.Open))
     {
         ClusterFromStream(service, fs, queryHint);
     }
 }
Пример #9
0
        /// <summary>
        /// Cluster data retrieved from a search engine or some other source registered in the
        /// DCS as a document source.
        /// </summary>
        private static void ClusterFromSearchEngine(MultipartFileUpload service, string sourceId, string query)
        {
            // The output format is XML.
            service.AddFormValue("dcs.output.format", "XML");

            // This time we will be interested in both clusters and documents.
            service.AddFormValue("dcs.clusters.only", "false");

            // Add query.
            service.AddFormValue("query", query);

            // Add the number of results.
            service.AddFormValue("results", "20");

            // Specify the source.
            service.AddFormValue("dcs.source", sourceId);

            // Perform the actual query.
            byte[] response = service.Post();

            // Parse the output and dump group headers.
            MemoryStream input = new MemoryStream(response);

            XmlDocument document = new XmlDocument();
            document.PreserveWhitespace = true;
            document.Load(input);

            PrintResults(document);
        }