Exemplo n.º 1
0
 private static void UpdateRadix(LongArray values, Radix radix, long highestSetIndex)
 {
     for (long dataIndex = 0; dataIndex <= highestSetIndex; dataIndex++)
     {
         radix.RegisterRadixOf(values.Get(dataIndex));
     }
 }
Exemplo n.º 2
0
 public ParallelSort(Radix radix, LongArray dataCache, long highestSetIndex, Tracker tracker, int threads, ProgressListener progress, Comparator comparator)
 {
     this._progress        = progress;
     this._comparator      = comparator;
     this._radixIndexCount = radix.RadixIndexCounts;
     this._radixCalculator = radix.Calculator();
     this._dataCache       = dataCache;
     this._highestSetIndex = highestSetIndex;
     this._tracker         = tracker;
     this._threads         = threads;
 }
Exemplo n.º 3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
//ORIGINAL LINE: private void detectDuplicateInputIds(Radix radix, org.neo4j.unsafe.impl.batchimport.input.Collector collector, org.neo4j.helpers.progress.ProgressListener progress) throws InterruptedException
        private void DetectDuplicateInputIds(Radix radix, Collector collector, ProgressListener progress)
        {
            // We do this collision sort using ParallelSort which has the data cache and the tracker cache,
            // the tracker cache gets sorted, data cache stays intact. In the collision data case we actually
            // have one more layer in here so we have tracker cache pointing to collisionNodeIdCache
            // pointing to dataCache. This can be done using the ParallelSort.Comparator abstraction.
            //
            // The Comparator below takes into account dataIndex for each eId its comparing so that an extra
            // comparison based on dataIndex is done if it's comparing two equal eIds. We do this so that
            // stretches of multiple equal eIds are sorted by dataIndex (i.e. node id) order,
            // to be able to write an efficient duplication scanning below and to have deterministic duplication reporting.
            Comparator duplicateComparator = new ComparatorAnonymousInnerClass(this);

            (new ParallelSort(radix, As5ByteLongArray(_collisionNodeIdCache), _numberOfCollisions - 1, _collisionTrackerCache, _processorsForParallelWork, progress, duplicateComparator)).run();

            // Here we have a populated C
            // We want to detect duplicate input ids within it
            long previousEid             = 0;
            int  previousGroupId         = -1;
            SameInputIdDetector detector = new SameInputIdDetector();

            progress.Started("DEDUPLICATE");
            for (int i = 0; i < _numberOfCollisions; i++)
            {
                long collisionIndex = _collisionTrackerCache.get(i);
                long nodeId         = _collisionNodeIdCache.get5ByteLong(collisionIndex, 0);
                long offset         = _collisionNodeIdCache.get6ByteLong(collisionIndex, 5);
                long eid            = _dataCache.get(nodeId);
                int  groupId        = GroupOf(nodeId);
                // collisions of same eId AND groupId are always together
                bool same = eid == previousEid && previousGroupId == groupId;
                if (!same)
                {
                    detector.Clear();
                }

                // Potential duplicate
                object inputId            = _collisionValues.get(offset);
                long   nonDuplicateNodeId = detector.Add(nodeId, inputId);
                if (nonDuplicateNodeId != -1)
                {                         // Duplicate
                    collector.CollectDuplicateNode(inputId, nodeId, _groups.get(groupId).name());
                    _trackerCache.markAsDuplicate(nodeId);
                    UnmarkAsCollision(nonDuplicateNodeId);
                }

                previousEid     = eid;
                previousGroupId = groupId;
                progress.Add(1);
            }
            progress.Done();
        }
Exemplo n.º 4
0
 internal EncodingIdMapper(NumberArrayFactory cacheFactory, Encoder encoder, Factory <Radix> radixFactory, Monitor monitor, TrackerFactory trackerFactory, Groups groups, System.Func <long, CollisionValues> collisionValuesFactory, int chunkSize, int processorsForParallelWork, Comparator comparator)
 {
     this._radixFactory              = radixFactory;
     this._monitor                   = monitor;
     this._cacheFactory              = cacheFactory;
     this._trackerFactory            = trackerFactory;
     this._collisionValuesFactory    = collisionValuesFactory;
     this._comparator                = comparator;
     this._processorsForParallelWork = max(processorsForParallelWork, 1);
     this._dataCache                 = cacheFactory.NewDynamicLongArray(chunkSize, GAP_VALUE);
     this._groupCache                = GroupCache.select(cacheFactory, chunkSize, groups.Size());
     this._groups  = groups;
     this._encoder = encoder;
     this._radix   = radixFactory.NewInstance();
 }
Exemplo n.º 5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
//ORIGINAL LINE: private void buildCollisionInfo(System.Func<long, Object> inputIdLookup, long pessimisticNumberOfCollisions, org.neo4j.unsafe.impl.batchimport.input.Collector collector, org.neo4j.helpers.progress.ProgressListener progress) throws InterruptedException
        private void BuildCollisionInfo(System.Func <long, object> inputIdLookup, long pessimisticNumberOfCollisions, Collector collector, ProgressListener progress)
        {
            progress.Started("RESOLVE (~" + pessimisticNumberOfCollisions + " collisions)");
            Radix radix = _radixFactory.newInstance();

            _collisionNodeIdCache  = _cacheFactory.newByteArray(pessimisticNumberOfCollisions, new sbyte[COLLISION_ENTRY_SIZE]);
            _collisionTrackerCache = _trackerFactory.create(_cacheFactory, pessimisticNumberOfCollisions);
            _collisionValues       = _collisionValuesFactory.apply(pessimisticNumberOfCollisions);
            for (long nodeId = 0; nodeId <= _highestSetIndex; nodeId++)
            {
                long eId = _dataCache.get(nodeId);
                if (IsCollision(eId))
                {
                    // Store this collision input id for matching later in get()
                    long   collisionIndex         = _numberOfCollisions++;
                    object id                     = inputIdLookup(nodeId);
                    long   eIdFromInputId         = Encode(id);
                    long   eIdWithoutCollisionBit = ClearCollision(eId);
                    Debug.Assert(eIdFromInputId == eIdWithoutCollisionBit, format("Encoding mismatch during building of " + "collision info. input id %s (a %s) marked as collision where this id was encoded into " + "%d when put, but was now encoded into %d", id, id.GetType().Name, eIdWithoutCollisionBit, eIdFromInputId));
                    long offset = _collisionValues.add(id);
                    _collisionNodeIdCache.set5ByteLong(collisionIndex, 0, nodeId);
                    _collisionNodeIdCache.set6ByteLong(collisionIndex, 5, offset);

                    // The base of our sorting this time is going to be node id, so register that in the radix
                    radix.RegisterRadixOf(eIdWithoutCollisionBit);
                }
                progress.Add(1);
            }
            progress.Done();

            // Detect input id duplicates within the same group, with source information, line number and the works
            DetectDuplicateInputIds(radix, collector, progress);

            // We won't be needing these anymore
            _collisionTrackerCache.close();
            _collisionTrackerCache = null;
        }