C# (CSharp) Microsoft.Data.Analysis DataFrameColumn.GetGroupedOccurrences 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: Microsoft.Data.Analysis

클래스/타입: DataFrameColumn

메소드/함수: GetGroupedOccurrences

hotexamples.com에서의 예제들: 2

C# (CSharp) Microsoft.Data.Analysis DataFrameColumn.GetGroupedOccurrences - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 Microsoft.Data.Analysis.DataFrameColumn.GetGroupedOccurrences에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Add(2)

ElementwiseNotEquals(2)

Subtract(2)

Or(2)

Multiply(2)

And(2)

GroupColumnValues(2)

GetGroupedOccurrences(2)

Modulo(2)

ElementwiseLessThanOrEqual(2)

ElementwiseGreaterThan(2)

Clone(2)

ElementwiseEquals(2)

Xor(2)

ElementwiseGreaterThanOrEqual(2)

ElementwiseLessThan(2)

ReverseOr(1)

ToArrowArray(1)

GetDataViewGetter(1)

SetName(1)

RightShift(1)

ReverseXor(1)

ReverseSubtract(1)

ReverseMultiply(1)

LeftShift(1)

ReverseModulo(1)

ReverseDivide(1)

GetArrowField(1)

ReverseAdd(1)

Divide(1)

GetMaxRecordBatchLength(1)

ReverseAnd(1)

예제 #1

파일 보기

파일: DataFrame.Join.cs 프로젝트: ScriptBox21/dotnet-ML.NET

        private static HashSet <long> Merge(DataFrame retainedDataFrame, DataFrame supplementaryDataFrame, string[] retainedJoinColumnNames, string[] supplemetaryJoinColumnNames, out PrimitiveDataFrameColumn <long> retainedRowIndices, out PrimitiveDataFrameColumn <long> supplementaryRowIndices, bool isInner = false, bool calculateIntersection = false)
        {
            if (retainedJoinColumnNames == null)
            {
                throw new ArgumentNullException(nameof(retainedJoinColumnNames));
            }

            if (supplemetaryJoinColumnNames == null)
            {
                throw new ArgumentNullException(nameof(supplemetaryJoinColumnNames));
            }

            if (retainedJoinColumnNames.Length != supplemetaryJoinColumnNames.Length)
            {
                throw new ArgumentException(Strings.MismatchedArrayLengths, nameof(retainedJoinColumnNames));
            }


            HashSet <long> intersection = calculateIntersection ? new HashSet <long>() : null;

            // Get occurrences of values in columns used for join in the retained and supplementary dataframes
            Dictionary <long, ICollection <long> > occurrences    = null;
            Dictionary <long, long> retainedIndicesReverseMapping = null;

            HashSet <long> supplementaryJoinColumnsNullIndices = new HashSet <long>();


            for (int colNameIndex = 0; colNameIndex < retainedJoinColumnNames.Length; colNameIndex++)
            {
                DataFrameColumn shrinkedRetainedColumn = retainedDataFrame.Columns[retainedJoinColumnNames[colNameIndex]];

                //shrink retained column by row occurrences from previous step
                if (occurrences != null)
                {
                    //only rows with occurences from previose step should go for futher processing
                    var shrinkedRetainedIndices = occurrences.Keys.ToArray();

                    //create reverse mapping of index of the row in the shrinked column to the index of this row in the original dataframe (new index -> original index)
                    var newRetainedIndicesReverseMapping = new Dictionary <long, long>(shrinkedRetainedIndices.Length);

                    for (int i = 0; i < shrinkedRetainedIndices.Length; i++)
                    {
                        //store reverse mapping to restore original dataframe indices from indices in shrinked row
                        var originalIndex = shrinkedRetainedIndices[i];
                        newRetainedIndicesReverseMapping.Add(i, originalIndex);
                    }

                    retainedIndicesReverseMapping = newRetainedIndicesReverseMapping;
                    shrinkedRetainedColumn        = shrinkedRetainedColumn.Clone(new Int64DataFrameColumn("Indices", shrinkedRetainedIndices));
                }

                DataFrameColumn supplementaryColumn = supplementaryDataFrame.Columns[supplemetaryJoinColumnNames[colNameIndex]];

                //Find occurrenses on current step (join column)
                var newOccurrences = shrinkedRetainedColumn.GetGroupedOccurrences(supplementaryColumn, out HashSet <long> supplementaryColumnNullIndices);

                //Convert indices from in key from local (shrinked row) to indices in original dataframe
                if (retainedIndicesReverseMapping != null)
                {
                    newOccurrences = newOccurrences.ToDictionary(kvp => retainedIndicesReverseMapping[kvp.Key], kvp => kvp.Value);
                }

                supplementaryJoinColumnsNullIndices.UnionWith(supplementaryColumnNullIndices);

                // shrink join result on current column by previous join columns (if any)
                // (we have to remove occurrences that doesn't exist in previous columns, because JOIN happens only if ALL left and right columns in JOIN are matched)
                if (occurrences != null)
                {
                    var shrinkedOccurences = new Dictionary <long, ICollection <long> >();

                    foreach (var kvp in newOccurrences)
                    {
                        var newValue = kvp.Value.Where(i => occurrences[kvp.Key].Contains(i)).ToArray();
                        if (newValue.Any())
                        {
                            shrinkedOccurences.Add(kvp.Key, newValue);
                        }
                    }
                    newOccurrences = shrinkedOccurences;
                }

                occurrences = newOccurrences;
            }

            retainedRowIndices      = new Int64DataFrameColumn("RetainedIndices");
            supplementaryRowIndices = new Int64DataFrameColumn("SupplementaryIndices");

            //Perform Merging
            var retainJoinColumns = retainedJoinColumnNames.Select(name => retainedDataFrame.Columns[name]).ToArray();

            for (long i = 0; i < retainedDataFrame.Columns.RowCount; i++)
            {
                if (!IsAnyNullValueInColumns(retainJoinColumns, i))
                {
                    //Get all row indexes from supplementary dataframe that sutisfy JOIN condition
                    if (occurrences.TryGetValue(i, out ICollection <long> rowIndices))
                    {
                        foreach (long supplementaryRowIndex in rowIndices)
                        {
                            retainedRowIndices.Append(i);
                            supplementaryRowIndices.Append(supplementaryRowIndex);

                            //store intersection if required
                            if (calculateIntersection)
                            {
                                if (!intersection.Contains(supplementaryRowIndex))
                                {
                                    intersection.Add(supplementaryRowIndex);
                                }
                            }
                        }
                    }
                    else
                    {
                        if (isInner)
                        {
                            continue;
                        }

                        retainedRowIndices.Append(i);
                        supplementaryRowIndices.Append(null);
                    }
                }
                else
                {
                    foreach (long row in supplementaryJoinColumnsNullIndices)
                    {
                        retainedRowIndices.Append(i);
                        supplementaryRowIndices.Append(row);
                    }
                }
            }

            return(intersection);
        }

예제 #2

파일 보기

        private static Dictionary <long, ICollection <long> > GetOccurences(DataFrame retainedDataFrame, DataFrame supplementaryDataFrame,
                                                                            string[] retainedJoinColumnNames, string[] supplemetaryJoinColumnNames, out HashSet <long> supplementaryJoinColumnsNullIndices)
        {
            supplementaryJoinColumnsNullIndices = new HashSet <long>();

            // Get occurrences of values in columns used for join in the retained and supplementary dataframes

            Dictionary <long, ICollection <long> > occurrences    = null;
            Dictionary <long, long> retainedIndicesReverseMapping = null;

            for (int colNameIndex = 0; colNameIndex < retainedJoinColumnNames.Length; colNameIndex++)
            {
                DataFrameColumn shrinkedRetainedColumn = retainedDataFrame.Columns[retainedJoinColumnNames[colNameIndex]];

                // Shrink retained column by row occurrences from previous step
                if (occurrences != null)
                {
                    // Only rows with occurences from previose step should go for futher processing
                    var shrinkedRetainedIndices = occurrences.Keys.ToArray();

                    // Create reverse mapping of index of the row in the shrinked column to the index of this row in the original dataframe (new index -> original index)
                    var newRetainedIndicesReverseMapping = new Dictionary <long, long>(shrinkedRetainedIndices.Length);

                    for (int i = 0; i < shrinkedRetainedIndices.Length; i++)
                    {
                        // Store reverse mapping to restore original dataframe indices from indices in shrinked row
                        var originalIndex = shrinkedRetainedIndices[i];
                        newRetainedIndicesReverseMapping.Add(i, originalIndex);
                    }

                    retainedIndicesReverseMapping = newRetainedIndicesReverseMapping;

                    var indices = new Int64DataFrameColumn("Indices", shrinkedRetainedIndices);
                    shrinkedRetainedColumn = shrinkedRetainedColumn.Clone(indices);
                }

                DataFrameColumn supplementaryColumn = supplementaryDataFrame.Columns[supplemetaryJoinColumnNames[colNameIndex]];

                // Find occurrenses on current step (join column)
                var newOccurrences = shrinkedRetainedColumn.GetGroupedOccurrences(supplementaryColumn, out HashSet <long> supplementaryColumnNullIndices);

                // Convert indices from in key from local (shrinked row) to indices in original dataframe
                if (retainedIndicesReverseMapping != null)
                {
                    newOccurrences = newOccurrences.ToDictionary(kvp => retainedIndicesReverseMapping[kvp.Key], kvp => kvp.Value);
                }

                supplementaryJoinColumnsNullIndices.UnionWith(supplementaryColumnNullIndices);

                // Shrink join result on current column by previous join columns (if any)
                // (we have to remove occurrences that doesn't exist in previous columns, because JOIN happens only if ALL left and right columns in JOIN are matched)
                if (occurrences != null)
                {
                    newOccurrences = GetShrinkedOccurences(occurrences, newOccurrences);
                }

                occurrences = newOccurrences;
            }

            return(occurrences);
        }