예제 #1
0
        private DataTable getData(long id, long versionId = 0)
        {
            DatasetManager dm = new DatasetManager();


            try
            {
                // if versionid = 0 - get latest Version
                // if version is not 0
                // check if version is latest version
                if (id != 0 && (versionId == 0 || dm.GetDatasetLatestVersionId(id).Equals(versionId)))
                {
                    DataTable data;

                    data = dm.GetLatestDatasetVersionTuples(id);
                    data.Strip();
                    return(data);
                }


                // if not
                return(getHistoryData(versionId));
            }
            finally
            {
                dm.Dispose();
            }
        }
예제 #2
0
        /// <summary>
        /// get the size of a tabular dataset contains rows and columns
        /// </summary>
        /// <param name="id">dataset id</param>
        /// <returns>list[0]:rows*cols, [1]:cols, [2]:rows</returns>
        private List <int> GetTabularSize(long id)
        {
            List <int>     sizeTabular = new List <int>();
            DatasetManager dm          = new DatasetManager();

            try
            {
                DataTable            table   = dm.GetLatestDatasetVersionTuples(id, true);
                DataRowCollection    rows    = table.Rows;
                DataColumnCollection columns = table.Columns;
                sizeTabular.Add(rows.Count * columns.Count);
                sizeTabular.Add(columns.Count);
                sizeTabular.Add(rows.Count);
            }
            catch
            {
                sizeTabular.Add(0);
                sizeTabular.Add(0);
                sizeTabular.Add(0);
            }
            return(sizeTabular);
        }
예제 #3
0
        //[MeasurePerformance]
        public ActionResult _CustomPrimaryDataBinding(GridCommand command, int datasetID)
        {
            GridModel model = new GridModel();

            Session["Filter"] = command;
            DatasetManager dm = new DatasetManager();


            try
            {
                if (dm.IsDatasetCheckedIn(datasetID))
                {
                    DatasetVersion dsv = dm.GetDatasetLatestVersion(datasetID);

                    // commented by Javad. Now the new API is called
                    //List<AbstractTuple> dataTuples = dm.GetDatasetVersionEffectiveTuples(dsv, command.Page - 1,
                    //    command.PageSize);
                    //DataTable table = SearchUIHelper.ConvertPrimaryDataToDatatable(dsv, dataTuples);
                    DataTable table = dm.GetLatestDatasetVersionTuples(dsv.Dataset.Id, command.Page - 1, command.PageSize);

                    Session["gridTotal"] = dm.GetDatasetVersionEffectiveTupleCount(dsv);

                    model       = new GridModel(table);
                    model.Total = Convert.ToInt32(Session["gridTotal"]); // (int)Session["gridTotal"];
                }
                else
                {
                    ModelState.AddModelError(String.Empty, "Dataset is just in processing.");
                }

                return(View(model));
            }
            finally
            {
                dm.Dispose();
            }
        }
예제 #4
0
        public void ProjectExpressionTest()
        {
            var dsHelper = new DatasetHelper();
            StructuredDataStructure dataStructure = dsHelper.CreateADataStructure();

            dataStructure.Should().NotBeNull("Failed to meet a precondition: a data strcuture is required.");

            string var1Name = "var" + dataStructure.Variables.First().Id;
            string var3Name = "var" + dataStructure.Variables.Skip(2).First().Id;

            //create prjection expression
            ProjectionExpression projectionExpression = new ProjectionExpression();

            projectionExpression.Items.Add(new ProjectionItemExpression()
            {
                FieldName = var1Name
            });
            projectionExpression.Items.Add(new ProjectionItemExpression()
            {
                FieldName = var3Name
            });

            // create a dataset and test the filter, sorting, and projectgion
            long numberOfTuples = 10;
            var  dm             = new DatasetManager();
            var  rsm            = new ResearchPlanManager();
            var  mdm            = new MetadataStructureManager();

            try
            {
                dataStructure.Should().NotBeNull("Failed to meet a precondition: a data strcuture is required.");

                var rp = dsHelper.CreateResearchPlan();
                rp.Should().NotBeNull("Failed to meet a precondition: a research plan is required.");

                var mds = mdm.Repo.Query().First();
                mds.Should().NotBeNull("Failed to meet a precondition: a metadata strcuture is required.");

                Dataset dataset = dm.CreateEmptyDataset(dataStructure, rp, mds);
                dataset = dsHelper.GenerateTuplesForDataset(dataset, dataStructure, numberOfTuples, "Javad");
                dataset.Should().NotBeNull("The dataset tuple generation has failed!");

                dm.CheckInDataset(dataset.Id, "for testing purposes 2", "Javad", ViewCreationBehavior.None);
                dm.SyncView(dataset.Id, ViewCreationBehavior.Create | ViewCreationBehavior.Refresh);

                dataset.Id.Should().BeGreaterThan(0, "Dataset was not persisted.");
                dataset.LastCheckIOTimestamp.Should().NotBeAfter(DateTime.UtcNow, "The dataset's timestamp is wrong.");
                dataset.DataStructure.Should().NotBeNull("Dataset must have a data structure.");
                dataset.Status.Should().Be(DatasetStatus.CheckedIn, "Dataset must be in the CheckedIn status.");
                dm.GetDatasetLatestVersionEffectiveTupleCount(dataset.Id).Should().Be(numberOfTuples);

                // pass this filter to get a subset of dataset X
                var dst = dm.GetLatestDatasetVersionTuples(dataset.Id, null, null, projectionExpression, 1, 3);
                dst.Should().NotBeNull();
                dst.Rows.Count.Should().BeLessOrEqualTo(3);
                dst.Columns.Count.Should().BeLessOrEqualTo(3, "Projection failed, wrong number of columns");

                dm.DatasetVersionRepo.Evict();
                dm.DataTupleRepo.Evict();
                dm.DatasetRepo.Evict();
                dm.PurgeDataset(dataset.Id, true);
                dsHelper.PurgeAllDataStructures();
            }
            catch (Exception ex)
            {
                throw ex;
            }
            finally
            {
                dm.Dispose();
                rsm.Dispose();
                mdm.Dispose();
            }
        }
예제 #5
0
        public void CreateAndExpressionForQueryingTest()
        {
            var dsHelper = new DatasetHelper();
            StructuredDataStructure dataStructure = dsHelper.CreateADataStructure();

            dataStructure.Should().NotBeNull("Failed to meet a precondition: a data strcuture is required.");

            string var1Name = "var" + dataStructure.Variables.First().Id;
            string var2Name = "var" + dataStructure.Variables.Skip(1).First().Id;

            FilterExpression fex = BinaryFilterExpression
                                   .And(
                new FilterNumberItemExpression()
            {
                Field = new Field()
                {
                    DataType = Utils.NH.Querying.DataType.Ineteger, Name = var1Name
                }
                ,
                Operator = NumberOperator.Operation.GreaterThan
                ,
                Value = 12
            }
                ,
                new FilterStringItemExpression()
            {
                Field = new Field()
                {
                    DataType = Utils.NH.Querying.DataType.String, Name = var2Name
                }
                ,
                Operator = StringOperator.Operation.EndsWith
                ,
                Value = "Test"
            }
                );

            fex.ToSQL().Should().Be($"(({var1Name}) > (12)) AND (({var2Name}) ILIKE ('%Test'))");

            // this is to show how to apply a NOT operator on any other expression.
            // It can be applied on Numeric, String, Date, and any other type of expression
            FilterExpression notFex = UnaryFilterExpression.Not(fex);

            notFex.ToSQL().Should().Be($"NOT ((({var1Name}) > (12)) AND (({var2Name}) ILIKE ('%Test')))");
            notFex.ToSQL().Should().Be($"NOT ({fex.ToSQL()})");

            OrderByExpression orderByExpr = new OrderByExpression(
                new List <OrderItemExpression>()
            {
                new OrderItemExpression(var1Name),
                new OrderItemExpression(var2Name, SortDirection.Descending)
            });

            orderByExpr.ToSQL().Should().Be($"{var1Name} ASC, {var2Name} DESC");

            // create a dataset and test the filter, sorting, and projectgion
            long numberOfTuples = 100;
            var  dm             = new DatasetManager();
            var  rsm            = new ResearchPlanManager();
            var  mdm            = new MetadataStructureManager();

            try
            {
                dataStructure.Should().NotBeNull("Failed to meet a precondition: a data strcuture is required.");

                var rp = dsHelper.CreateResearchPlan();
                rp.Should().NotBeNull("Failed to meet a precondition: a research plan is required.");

                var mds = mdm.Repo.Query().First();
                mds.Should().NotBeNull("Failed to meet a precondition: a metadata strcuture is required.");

                Dataset dataset = dm.CreateEmptyDataset(dataStructure, rp, mds);
                dataset = dsHelper.GenerateTuplesForDataset(dataset, dataStructure, numberOfTuples, "Javad");
                dataset.Should().NotBeNull("The dataset tuple generation has failed!");

                dm.CheckInDataset(dataset.Id, "for testing purposes 2", "Javad", ViewCreationBehavior.None);
                dm.SyncView(dataset.Id, ViewCreationBehavior.Create | ViewCreationBehavior.Refresh);

                dataset.Id.Should().BeGreaterThan(0, "Dataset was not persisted.");
                dataset.LastCheckIOTimestamp.Should().NotBeAfter(DateTime.UtcNow, "The dataset's timestamp is wrong.");
                dataset.DataStructure.Should().NotBeNull("Dataset must have a data structure.");
                dataset.Status.Should().Be(DatasetStatus.CheckedIn, "Dataset must be in the CheckedIn status.");
                dm.GetDatasetLatestVersionEffectiveTupleCount(dataset.Id).Should().Be(numberOfTuples);

                // pass this filter to get a subset of dataset X
                var dst = dm.GetLatestDatasetVersionTuples(dataset.Id, fex, null, null, 1, 10);
                dst.Should().NotBeNull();
                dst.Rows.Count.Should().BeLessOrEqualTo(10);

                dm.DatasetVersionRepo.Evict();
                dm.DataTupleRepo.Evict();
                dm.DatasetRepo.Evict();
                dm.PurgeDataset(dataset.Id, true);

                dsHelper.PurgeAllDataStructures();
            }
            finally
            {
                dm.Dispose();
                rsm.Dispose();
                mdm.Dispose();
            }
        }
예제 #6
0
        //[MeasurePerformance]
        public ActionResult ShowPrimaryData(long datasetID)
        {
            Session["Filter"]              = null;
            Session["Columns"]             = null;
            Session["DownloadFullDataset"] = false;
            ViewData["DownloadOptions"]    = null;

            DatasetManager       dm  = new DatasetManager();
            DataStructureManager dsm = new DataStructureManager();
            //permission download
            EntityPermissionManager entityPermissionManager = new EntityPermissionManager();

            try
            {
                if (dm.IsDatasetCheckedIn(datasetID))
                {
                    //long versionId = dm.GetDatasetLatestVersionId(datasetID); // check for zero value
                    //DatasetVersion dsv = dm.DatasetVersionRepo.Get(versionId);
                    DatasetVersion          dsv = dm.GetDatasetLatestVersion(datasetID);
                    StructuredDataStructure sds = dsm.StructuredDataStructureRepo.Get(dsv.Dataset.DataStructure.Id);
                    DataStructure           ds  = dsm.AllTypesDataStructureRepo.Get(dsv.Dataset.DataStructure.Id);

                    // TODO: refactor Download Right not existing, so i set it to read
                    bool downloadAccess = entityPermissionManager.HasEffectiveRight(HttpContext.User.Identity.Name,
                                                                                    "Dataset", typeof(Dataset), datasetID, RightType.Read);

                    //TITLE
                    string title = xmlDatasetHelper.GetInformationFromVersion(dsv.Id, NameAttributeValues.title);

                    if (ds.Self.GetType() == typeof(StructuredDataStructure))
                    {
                        //ToDO Javad: 18.07.2017 -> replaced to the new API for fast retrieval of the latest version
                        //
                        //List<AbstractTuple> dataTuples = dm.GetDatasetVersionEffectiveTuples(dsv, 0, 100);
                        //DataTable table = SearchUIHelper.ConvertPrimaryDataToDatatable(dsv, dataTuples);
                        DataTable table = dm.GetLatestDatasetVersionTuples(dsv.Dataset.Id, 0, 100);

                        Session["gridTotal"] = dm.GetDatasetVersionEffectiveTupleCount(dsv);

                        return(PartialView(ShowPrimaryDataModel.Convert(datasetID, title, sds, table, downloadAccess)));

                        //return PartialView(new ShowPrimaryDataModel());
                    }

                    if (ds.Self.GetType() == typeof(UnStructuredDataStructure))
                    {
                        return
                            (PartialView(ShowPrimaryDataModel.Convert(datasetID, title, ds,
                                                                      SearchUIHelper.GetContantDescriptorFromKey(dsv, "unstructuredData"), downloadAccess)));
                    }
                }
                else
                {
                    ModelState.AddModelError(string.Empty, "Dataset is just in processing.");
                }

                return(PartialView(null));
            }
            finally
            {
                dm.Dispose();
                dsm.Dispose();
                entityPermissionManager.Dispose();
            }
        }
예제 #7
0
        private void indexPrimaryData(long id, List <XmlNode> categoryNodes, ref Document dataset, string docId, XmlDocument metadataDoc)
        {
            DatasetManager       dm  = new DatasetManager();
            DataStructureManager dsm = new DataStructureManager();

            if (!dm.IsDatasetCheckedIn(id))
            {
                return;
            }

            DatasetVersion          dsv = dm.GetDatasetLatestVersion(id);
            StructuredDataStructure sds = dsm.StructuredDataStructureRepo.Get(dsv.Dataset.DataStructure.Id);

            if (sds == null)
            {
                return;
            }

            indexStructureDataStructcure(sds, ref dataset, docId);

            if (!includePrimaryData)
            {
                return;
            }

            try
            {
                {
                    // Javad: check if the dataset is "checked-in". If yes, then use the paging version of the GetDatasetVersionEffectiveTuples method
                    // number of tuples for the for loop is also available via GetDatasetVersionEffectiveTupleCount
                    // a proper fetch (page) size can be obtained by calling dm.PreferedBatchSize
                    int  fetchSize  = dm.PreferedBatchSize;
                    long tupleSize  = dm.GetDatasetVersionEffectiveTupleCount(dsv);
                    long noOfFetchs = tupleSize / fetchSize + 1;
                    for (int round = 0; round < noOfFetchs; round++)
                    {
                        List <string> primaryDataStringToindex = null;
                        using (DataTable table = dm.GetLatestDatasetVersionTuples(dsv.Dataset.Id, round, fetchSize))
                        {
                            primaryDataStringToindex = getAllStringValuesFromTable(table); // should take the table
                            table.Dispose();
                        }

                        foreach (XmlNode category in categoryNodes)
                        {
                            String primitiveType = category.Attributes.GetNamedItem("primitive_type").Value;
                            String lucene_name   = category.Attributes.GetNamedItem("lucene_name").Value;
                            String analysing     = category.Attributes.GetNamedItem("analysed").Value;
                            float  boosting      = Convert.ToSingle(category.Attributes.GetNamedItem("boost").Value);
                            var    toAnalyse     = Lucene.Net.Documents.Field.Index.NOT_ANALYZED;

                            if (analysing.ToLower().Equals("yes"))
                            {
                                toAnalyse = Lucene.Net.Documents.Field.Index.ANALYZED;
                            }

                            if (category.Attributes.GetNamedItem("type").Value.Equals("primary_data_field"))
                            {
                                if (primaryDataStringToindex != null && primaryDataStringToindex.Count > 0)
                                {
                                    foreach (string pDataValue in primaryDataStringToindex)
                                    // Loop through List with foreach
                                    {
                                        Field a = new Field("category_" + lucene_name, pDataValue,
                                                            Lucene.Net.Documents.Field.Store.NO, toAnalyse);
                                        a.Boost = boosting;
                                        dataset.Add(a);
                                        dataset.Add(new Field("ng_" + lucene_name, pDataValue,
                                                              Lucene.Net.Documents.Field.Store.YES, Lucene.Net.Documents.Field.Index.ANALYZED));
                                        dataset.Add(new Field("ng_all", pDataValue, Lucene.Net.Documents.Field.Store.YES,
                                                              Lucene.Net.Documents.Field.Index.ANALYZED));
                                        writeAutoCompleteIndex(docId, lucene_name, pDataValue);
                                        writeAutoCompleteIndex(docId, "ng_all", pDataValue);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                throw ex;
            }
            finally
            {
                dm.Dispose();
                dsm.Dispose();
            }
        }
예제 #8
0
        public ActionResult dqSync()
        {
            using (var dm = new DatasetManager())
            {
                List <long>             datasetIds = dm.GetDatasetLatestIds(); //get latest
                EntityPermissionManager entityPermissionManager = new EntityPermissionManager();
                DataStructureManager    dsm = new DataStructureManager();

                try
                {
                    //datasetManager.SyncView(datasetIds, ViewCreationBehavior.Create | ViewCreationBehavior.Refresh);
                    // if the viewData has a model error, the redirect forgets about it.
                    string       pathPerformers         = @"C:\Data\DatasetQualities\Performers.csv";
                    StreamWriter writerPerformers       = new StreamWriter(pathPerformers);
                    string       pathPerformerDataset   = @"C:\Data\DatasetQualities\PerformerDataset.csv";
                    StreamWriter writerPerformerDataset = new StreamWriter(pathPerformerDataset);
                    string       pathComparison         = @"C:\Data\DatasetQualities\Comparison.csv";
                    StreamWriter writerComparison       = new StreamWriter(pathComparison);
                    string       pathDatasets           = @"C:\Data\DatasetQualities\datasetInfo.csv";
                    StreamWriter writerDatasets         = new StreamWriter(pathDatasets);
                    string       pathVariable           = @"C:\Data\DatasetQualities\Variables.csv";
                    StreamWriter writerVariable         = new StreamWriter(pathVariable);
                    string       pathFiles   = @"C:\Data\DatasetQualities\Files.csv";
                    StreamWriter writerFiles = new StreamWriter(pathFiles);

                    string variableHeader = "datasetId,VarLabel,varType,varDescription,varUse,varMissing";
                    writerVariable.WriteLine(variableHeader);

                    string                   performer;
                    List <string>            performerDataset = new List <string>();
                    Dictionary <string, int> performerCount   = new Dictionary <string, int>();
                    List <int>               metadataRates    = new List <int>();
                    int allValidMetadas = 0;
                    //int publicDatasets = 0; //could not get result
                    //int restrictedDatasets = 0; //could not get result
                    List <int>    dsDescLength       = new List <int>();
                    List <int>    dstrDescLength     = new List <int>();
                    List <int>    dstrUsage          = new List <int>();
                    List <int>    datasetSizeTabular = new List <int>();
                    List <int>    datasetRows        = new List <int>();
                    List <int>    datasetCols        = new List <int>();
                    List <double> datasetSizeFiles   = new List <double>(); //all files in all datasets
                    List <int>    datasetFileNumber  = new List <int>();
                    List <int>    sizeTabular        = new List <int>();    //collect size, column number, and row number for one dataset

                    int fileDatasets    = 0;
                    int tabularDatasets = 0;

                    int           fileNumber       = 0;
                    List <double> datasetTotalSize = new List <double>(); //total file size of each dataset
                    List <double> sizeFile         = new List <double>(); ///////////////////////////


                    foreach (long datasetId in datasetIds)
                    {
                        DatasetVersion datasetLatestVersion = dm.GetDatasetLatestVersion(datasetId);                               //get last dataset versions
                        DataStructure  dataStr = dsm.AllTypesDataStructureRepo.Get(datasetLatestVersion.Dataset.DataStructure.Id); //get data structure


                        #region performers
                        List <string> pers = new List <string>();
                        var           dsvs = dm.GetDatasetVersions(datasetId);
                        foreach (var d in dsvs)
                        {
                            performer = d.ModificationInfo.Performer;
                            if (performer != null && !pers.Contains(performer))
                            {
                                pers.Add(performer);  //a list of performers
                            }
                        }
                        foreach (var p in pers)
                        {
                            writerPerformerDataset.WriteLine(p + "," + datasetId); //fill the file PerformerDataset with a list of 'performer,datasetId'
                            if (performerCount.ContainsKey(p))
                            {
                                performerCount[p] += 1;
                            }
                            else
                            {
                                performerCount.Add(p, 1);
                            }
                        }
                        #endregion

                        #region allValidmetadatas
                        long metadataStructureId = dm.DatasetRepo.Get(datasetId).MetadataStructure.Id;
                        int  validMetadata       = 0;
                        if (datasetLatestVersion.StateInfo != null)
                        {
                            validMetadata = DatasetStateInfo.Valid.ToString().Equals(datasetLatestVersion.StateInfo.State) ? 1 : 0; //1:valid; 0:invalid.
                        }
                        else
                        {
                            validMetadata = 0;
                        }
                        if (validMetadata == 1)  //count how many datasets have valid metadata
                        {
                            allValidMetadas += 1;
                        }

                        #endregion

                        #region metadataRates
                        XmlDocument         metadata = datasetLatestVersion.Metadata;
                        string              xmlFrag  = metadata.OuterXml;
                        List <int>          metaInfo = new List <int>();
                        NameTable           nt       = new NameTable();
                        XmlNamespaceManager nsmgr    = new XmlNamespaceManager(nt);
                        // Create the XmlParserContext.
                        XmlParserContext context = new XmlParserContext(null, nsmgr, null, XmlSpace.None);
                        // Create the reader.
                        XmlTextReader reader = new XmlTextReader(xmlFrag, XmlNodeType.Element, context);

                        int countMetaAttr       = 0;
                        int countMetaComplition = 0;

                        // Parse the XML and display each node.
                        while (reader.Read())
                        {
                            //XmlTextReader myReader = reader;
                            if (reader.NodeType == XmlNodeType.Element)
                            {
                                if (reader.HasAttributes && reader.GetAttribute("type") == "MetadataAttribute")
                                {
                                    countMetaAttr += 1;
                                    reader.Read();
                                    if (reader.NodeType == XmlNodeType.Text)
                                    {
                                        string text = reader.Value;
                                        countMetaComplition += 1;
                                    }
                                }
                            }
                        }

                        // Close the reader.
                        reader.Close();
                        int rate = (countMetaComplition * 100) / countMetaAttr; //percentage of all metadata fields contains information

                        metadataRates.Add(rate);
                        #endregion

                        ////find how many public dataset exist
                        //int publicRights = entityPermissionManager.GetRights(null, 1, datasetId); //1:public; 0:restricted
                        //if (publicRights == 1) { publicDatasets += 1; }
                        //if (publicRights == 0) { restrictedDatasets += 1; }

                        ///issue here is that when a dataset has an empty dataset description field, the datasetLatestVersion.Description has the "not available" as value.
                        int datasetLatestVersionDescriptionLength = 0;
                        if (datasetLatestVersion.Description == "not available")
                        {
                            dsDescLength.Add(0);
                        }
                        else
                        {
                            dsDescLength.Add(datasetLatestVersion.Description.Length); //get dataset description length
                            datasetLatestVersionDescriptionLength = datasetLatestVersion.Description.Length;
                        }

                        dstrDescLength.Add(datasetLatestVersion.Dataset.DataStructure.Description.Length); //get data structure description length
                        dstrUsage.Add(dataStr.Datasets.Count() - 1);                                       //data structure is used in how many other datasets (doesn't contain the current one)

                        string type = "file";
                        if (dataStr.Self.GetType() == typeof(StructuredDataStructure))
                        {
                            type = "tabular";
                        }                                                                                    //get dataset type
                        int colNum = 0;
                        int rowNum = 0;
                        #region tabular dataset
                        if (type == "tabular")
                        {
                            tabularDatasets += 1;
                            try
                            {
                                DataTable               table   = dm.GetLatestDatasetVersionTuples(datasetId, true);
                                DataRowCollection       rowss   = table.Rows;
                                DataColumnCollection    columns = table.Columns;
                                StructuredDataStructure sds     = dsm.StructuredDataStructureRepo.Get(datasetLatestVersion.Dataset.DataStructure.Id); //get data structure
                                var variables = sds.Variables;                                                                                        //get variables
                                //sizeTabular[1] = variables.Count; //columns.Count - 4;
                                //if (sizeTabular[1] < 0) //if data structure has not been designed.
                                //{
                                //    sizeTabular[1] = 0;
                                //}
                                //sizeTabular[2] = rowss.Count;
                                //sizeTabular[0] = sizeTabular[1] * sizeTabular[2];
                                colNum = variables.Count;
                                rowNum = rowss.Count;

                                #region variables

                                int columnNumber = -1; //First four columns are added from system.
                                if (variables.Count() > 0)
                                {
                                    foreach (var variable in variables)
                                    {
                                        columnNumber += 1;
                                        //string missingValue = variable.MissingValue; //MISSING VALUE
                                        List <string> missingValues = new List <string>(); //creat a list contains missing values
                                        DataTable     missTable     = new DataTable();
                                        foreach (var missValue in variable.MissingValues)  //if data is equal missing value
                                        {
                                            missingValues.Add(missValue.Placeholder);
                                        }
                                        var    varUse  = variable.DataAttribute.UsagesAsVariable.Count() - 1;
                                        string varType = variable.DataAttribute.DataType.SystemType;

                                        int varMissing = 100; //suppose 100% is completed
                                        try
                                        {
                                            if (rowss.Count > 0)
                                            {
                                                int missing = rowss.Count;
                                                foreach (DataRow row in rowss)
                                                {
                                                    var value = row.ItemArray[columnNumber];                       //.ToString();
                                                    if (value == null || missingValues.Contains(value.ToString())) //check if cell is emty or contains a missing value
                                                    {
                                                        missing -= 1;
                                                    }
                                                }
                                                varMissing = 100 * missing / rowss.Count; //% of existing values
                                            }
                                            else
                                            {
                                                varMissing = 0;
                                            }
                                        }
                                        catch
                                        {
                                            varMissing = 0;
                                        }
                                        string variableLine = datasetId + ","                      //0: dataset Id
                                                              + variable.Label + ","               //1: variable name
                                                              + varType + ","                      //2: data type
                                                              + variable.Description.Count() + "," //3: variable description length
                                                              + varUse + ","                       //4: variable usage
                                                              + varMissing;                        //5: % completed
                                        writerVariable.WriteLine(variableLine);
                                    }
                                }
                                #endregion
                            }
                            catch
                            {
                                colNum = 0;
                                rowNum = 0;
                                //sizeTabular.Add(0);
                                //sizeTabular.Add(0);
                                //sizeTabular.Add(0);
                            }



                            datasetSizeTabular.Add(colNum * rowNum); //sizeTabular[0]);
                            datasetCols.Add(colNum);                 // sizeTabular[1]); //column number
                            datasetRows.Add(rowNum);                 // sizeTabular[2]); //row number
                        }

                        #endregion

                        #region file dataset
                        else if (type == "file")
                        {
                            fileDatasets += 1;
                            List <ContentDescriptor> contentDescriptors = datasetLatestVersion.ContentDescriptors.ToList();
                            fileNumber = contentDescriptors.Count;
                            //datasetFileNumber.Add(fileNumber);
                            //sizeFile.Add(fileNumber);
                            int    fileNum       = 0;
                            double totalSize     = 0;
                            string fileInDataset = "";
                            if (contentDescriptors.Count > 0)
                            {
                                foreach (ContentDescriptor cd in contentDescriptors)
                                {
                                    if (cd.Name.ToLower().Equals("unstructureddata"))
                                    {
                                        fileNum += 1;

                                        string uri  = cd.URI;
                                        String path = Server.UrlDecode(uri);
                                        path = Path.Combine(AppConfiguration.DataPath, path);
                                        try
                                        {
                                            Stream     fileStream = System.IO.File.OpenRead(path);
                                            FileStream fs         = fileStream as FileStream;
                                            if (fs != null)
                                            {
                                                FileInformation fileInfo = new FileInformation(fs.Name.Split('\\').LastOrDefault(), MimeMapping.GetMimeMapping(fs.Name), (uint)fs.Length, uri);
                                                totalSize    += fileInfo.Size;
                                                fileInDataset = datasetId + "," + fileInfo.Name.Split('.')[0] + "," + fileInfo.Name.Split('.')[1].ToLower() + "," + fileInfo.Size; //datasetId,file name,file extension,file size
                                                writerFiles.WriteLine(fileInDataset);
                                            }
                                        }
                                        catch
                                        {
                                            datasetSizeFiles.Add(0); //file size
                                        }
                                    }
                                }

                                datasetFileNumber.Add(fileNum);
                                datasetTotalSize.Add(totalSize);
                            }
                            else
                            {
                                datasetFileNumber.Add(0);
                                datasetTotalSize.Add(0);
                            }
                        }

                        #endregion

                        //[0]datasetId, [1]dataType, [2]IsValid, [3]metadataComplitionRate,
                        //[4]datasetDescLength, [5]dataStrDescrLength, [6]DataStrUsage,
                        //[7]columns, [8]rows, [9]file numbers, [10]file sizes, [11]performers

                        string datasetInfo = datasetId + ";" + type + ";" + validMetadata + ";" + rate + ";"
                                             + datasetLatestVersionDescriptionLength + ";"
                                             + datasetLatestVersion.Dataset.DataStructure.Description.Length + ";"
                                             + (dataStr.Datasets.Count() - 1);
                        if (type == "tabular")
                        {
                            datasetInfo = datasetInfo + ";" + datasetCols.Last()  //column number
                                          + ";" + datasetRows.Last()              //row number
                                          + ";0;0";                               //file number and size
                        }
                        if (type == "file")
                        {
                            datasetInfo = datasetInfo + ";0;0"             //column and row number
                                          + ";" + datasetFileNumber.Last() //sizeFile[0]             //file number
                                          + ";" + datasetTotalSize.Last(); //sizeFile[1];            //total size
                        }
                        string prfmrs = "";
                        foreach (string p in pers)
                        {
                            prfmrs = prfmrs + FindPerformerNameFromUsername(p) + ",";
                        }
                        prfmrs.Remove(prfmrs.Length - 1, 1);
                        datasetInfo = datasetInfo + ";" + prfmrs;
                        writerDatasets.WriteLine(datasetInfo);
                    }
                    writerDatasets.Close();

                    #region performersInFile
                    //write a list of 'performer,activity' in Performers.csv
                    foreach (string p in performerCount.Keys)
                    {
                        string l = p + "," + performerCount[p];
                        writerPerformers.WriteLine(l);
                    }

                    // performer activities
                    int        performerMin        = performerCount.Values.Min();
                    int        performerMax        = performerCount.Values.Max();
                    List <int> performerActivities = new List <int>();
                    foreach (int s in performerCount.Values)
                    {
                        performerActivities.Add(s);
                    }
                    double performerMedian  = medianCalc(performerActivities);
                    string performerCompare = "performersActivity," + performerMin + "," + performerMedian + "," + performerMax;
                    writerComparison.WriteLine(performerCompare); //performersActivity
                    writerPerformers.Close();
                    writerPerformerDataset.Close();
                    #endregion

                    #region datasetInfo in file

                    #endregion //datasetInfo in file

                    #region compare in file
                    string m = "metadataRates," + metadataRates.Min() + "," + medianCalc(metadataRates) + "," + metadataRates.Max();
                    writerComparison.WriteLine(m);
                    string allValids = "allValidMetadas," + allValidMetadas;
                    writerComparison.WriteLine(allValids);
                    //string pd = "publicDatasets," + publicDatasets;
                    //string rd = "restrictedDatasets," + restrictedDatasets;
                    //writerComparison.WriteLine(pd);
                    //writerComparison.WriteLine(rd);
                    string datasetDescriptionLength = "datasetDescriptionLength," + dsDescLength.Min() + "," + medianCalc(dsDescLength) + "," + dsDescLength.Max();
                    string dataStrDescriptionLength = "dataStrDescriptionLength," + dstrDescLength.Min() + "," + medianCalc(dstrDescLength) + "," + dstrDescLength.Max();
                    string dataStrUsage             = "dataStrUsage," + dstrUsage.Min() + "," + medianCalc(dstrUsage) + "," + dstrUsage.Max();
                    writerComparison.WriteLine(datasetDescriptionLength);
                    writerComparison.WriteLine(dataStrDescriptionLength);
                    writerComparison.WriteLine(dataStrUsage);

                    string typeDataset = "type," + (tabularDatasets + fileDatasets) + "," + tabularDatasets + "," + fileDatasets;
                    writerComparison.WriteLine(typeDataset);

                    string cols          = "datasetColNumber," + datasetCols.Min() + "," + medianCalc(datasetCols) + "," + datasetCols.Max();
                    string rows          = "datasetRowNumber," + datasetRows.Min() + "," + medianCalc(datasetRows) + "," + datasetRows.Max();
                    string fileNums      = "";
                    string fileSizes     = "";
                    string totalFileSize = "";
                    //if (datasetFileNumber.Count > 0)
                    //{
                    fileNums = "datasetFileNumber," + datasetFileNumber.Min() + "," + medianCalc(datasetFileNumber) + "," + datasetFileNumber.Max();
                    //fileSizes = "datasetSizeFiles," + datasetSizeFiles.Min() + "," + medianCalc(datasetSizeFiles) + "," + datasetSizeFiles.Max();
                    totalFileSize = "datasetTotalSizeFiles," + datasetTotalSize.Min() + "," + medianCalc(datasetTotalSize) + "," + datasetTotalSize.Max();
                    //}
                    //else
                    //{
                    //    fileNums = "datasetFileNumber," + 0 + "," + 0 + "," + 0;
                    //    //fileSizes = "datasetSizeFiles," + 0 + "," + 0 + "," + 0;
                    //    totalFileSize = "datasetTotalSizeFiles," + 0 + "," + 0 + "," + 0;

                    //}
                    writerComparison.WriteLine(cols);
                    writerComparison.WriteLine(rows);
                    writerComparison.WriteLine(fileNums);
                    writerComparison.WriteLine(fileSizes);
                    writerComparison.WriteLine(totalFileSize);
                    #endregion


                    writerComparison.Close();
                    //writerDatasets.Close();
                    writerVariable.Close();
                    writerFiles.Close();
                    return(View());
                    //return RedirectToAction("Index", new { area = "dqm" });
                }
                catch (Exception ex)
                {
                    ViewData.ModelState.AddModelError("", $@"'{ex.Message}'");
                    return(RedirectToAction("dqError", new { area = "dqm" }));
                }
            }
        }
예제 #9
0
        private HttpResponseMessage getData(long id, int version, string token, string projection = null, string selection = null)
        {
            DatasetManager          datasetManager          = new DatasetManager();
            UserManager             userManager             = new UserManager();
            EntityPermissionManager entityPermissionManager = new EntityPermissionManager();
            EntityManager           entityManager           = new EntityManager();

            bool isPublic = false;

            try
            {
                // if a dataset is public, then the api should also return data if there is no token for a user

                #region is public

                long?entityTypeId = entityManager.FindByName(typeof(Dataset).Name)?.Id;
                entityTypeId = entityTypeId.HasValue ? entityTypeId.Value : -1;

                isPublic = entityPermissionManager.Exists(null, entityTypeId.Value, id);

                #endregion is public

                if (!isPublic && String.IsNullOrEmpty(token))

                {
                    var request = Request.CreateResponse();
                    request.Content = new StringContent("Bearer token not exist.");

                    return(request);
                }

                User user = userManager.Users.Where(u => u.Token.Equals(token)).FirstOrDefault();

                if (isPublic || user != null)
                {
                    if (isPublic || entityPermissionManager.HasEffectiveRight(user.Name, typeof(Dataset), id, RightType.Read))
                    {
                        XmlDatasetHelper  xmlDatasetHelper    = new XmlDatasetHelper();
                        OutputDataManager ioOutputDataManager = new OutputDataManager();

                        Dataset dataset = datasetManager.GetDataset(id);

                        // If the requested version is -1 or the last version of the dataset, then the data will be loaded in a
                        // different way than when loading the data from an older version
                        bool isLatestVersion = false;
                        if (version == -1 || dataset.Versions.Count == version)
                        {
                            isLatestVersion = true;
                        }

                        if (isLatestVersion)
                        {
                            #region get data from the latest version of a dataset

                            DatasetVersion datasetVersion = datasetManager.GetDatasetLatestVersion(id);

                            string title = datasetVersion.Title;

                            // check the data sturcture type ...
                            if (datasetVersion.Dataset.DataStructure.Self is StructuredDataStructure)
                            {
                                //FilterExpression filter = null;
                                //OrderByExpression orderBy = null;
                                //ProjectionExpression projectionExpression = GetProjectionExpression(projection);

                                // apply selection and projection
                                long count = datasetManager.RowCount(id);

                                DataTable dt = datasetManager.GetLatestDatasetVersionTuples(id, null, null, null, 0, (int)count);
                                dt.Strip();

                                if (!string.IsNullOrEmpty(selection))
                                {
                                    dt = OutputDataManager.SelectionOnDataTable(dt, selection, true);
                                }

                                if (!string.IsNullOrEmpty(projection))
                                {
                                    // make the header names upper case to make them case insensitive
                                    dt = OutputDataManager.ProjectionOnDataTable(dt, projection.ToUpper().Split(','));
                                }

                                dt.TableName = id + "_data";

                                DatasetModel model = new DatasetModel();
                                model.DataTable = dt;

                                var response = Request.CreateResponse();
                                response.Content = new ObjectContent(typeof(DatasetModel), model, new DatasetModelCsvFormatter(model.DataTable.TableName));
                                response.Content.Headers.ContentType = new MediaTypeHeaderValue("text/csv");

                                //set headers on the "response"
                                return(response);

                                #endregion get data from the latest version of a dataset

                                //return model;
                            }
                            else
                            {
                                return(Request.CreateResponse());
                            }
                        }
                        else
                        {
                            #region load data of a older version of a dataset

                            int index = version - 1;
                            if (version >= dataset.Versions.Count)
                            {
                                return(Request.CreateResponse(HttpStatusCode.PreconditionFailed, String.Format("This version ({0}) is not available for the dataset", version)));
                            }

                            DatasetVersion datasetVersion = dataset.Versions.OrderBy(d => d.Timestamp).ElementAt(version - 1);

                            string title = datasetVersion.Title;

                            // check the data sturcture type ...
                            if (datasetVersion.Dataset.DataStructure.Self is StructuredDataStructure)
                            {
                                //FilterExpression filter = null;
                                //OrderByExpression orderBy = null;

                                // apply selection and projection
                                int       count = datasetManager.GetDatasetVersionEffectiveTuples(datasetVersion).Count;
                                DataTable dt    = datasetManager.GetDatasetVersionTuples(datasetVersion.Id, 0, count);

                                dt.Strip();

                                if (!string.IsNullOrEmpty(selection))
                                {
                                    dt = OutputDataManager.SelectionOnDataTable(dt, selection);
                                }

                                if (!string.IsNullOrEmpty(projection))
                                {
                                    // make the header names upper case to make them case insensitive
                                    dt = OutputDataManager.ProjectionOnDataTable(dt, projection.ToUpper().Split(','));
                                }

                                dt.TableName = id + "_data";

                                DatasetModel model = new DatasetModel();
                                model.DataTable = dt;

                                var response = Request.CreateResponse();
                                response.Content = new ObjectContent(typeof(DatasetModel), model, new DatasetModelCsvFormatter(model.DataTable.TableName));
                                response.Content.Headers.ContentType = new MediaTypeHeaderValue("text/csv");

                                //set headers on the "response"
                                return(response);
                            }
                            else // return files of the unstructure dataset
                            {
                                return(Request.CreateResponse());
                            }

                            #endregion load data of a older version of a dataset
                        }
                    }
                    else // has rights?
                    {
                        var request = Request.CreateResponse();
                        request.Content = new StringContent("User has no read right.");

                        return(request);
                    }
                }
                else
                {
                    var request = Request.CreateResponse();
                    request.Content = new StringContent("User is not available.");

                    return(request);
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                datasetManager.Dispose();
                userManager.Dispose();
                entityPermissionManager.Dispose();
                entityManager.Dispose();
            }
        }