private static void UpdateRadix(LongArray values, Radix radix, long highestSetIndex) { for (long dataIndex = 0; dataIndex <= highestSetIndex; dataIndex++) { radix.RegisterRadixOf(values.Get(dataIndex)); } }
public ParallelSort(Radix radix, LongArray dataCache, long highestSetIndex, Tracker tracker, int threads, ProgressListener progress, Comparator comparator) { this._progress = progress; this._comparator = comparator; this._radixIndexCount = radix.RadixIndexCounts; this._radixCalculator = radix.Calculator(); this._dataCache = dataCache; this._highestSetIndex = highestSetIndex; this._tracker = tracker; this._threads = threads; }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#: //ORIGINAL LINE: private void detectDuplicateInputIds(Radix radix, org.neo4j.unsafe.impl.batchimport.input.Collector collector, org.neo4j.helpers.progress.ProgressListener progress) throws InterruptedException private void DetectDuplicateInputIds(Radix radix, Collector collector, ProgressListener progress) { // We do this collision sort using ParallelSort which has the data cache and the tracker cache, // the tracker cache gets sorted, data cache stays intact. In the collision data case we actually // have one more layer in here so we have tracker cache pointing to collisionNodeIdCache // pointing to dataCache. This can be done using the ParallelSort.Comparator abstraction. // // The Comparator below takes into account dataIndex for each eId its comparing so that an extra // comparison based on dataIndex is done if it's comparing two equal eIds. We do this so that // stretches of multiple equal eIds are sorted by dataIndex (i.e. node id) order, // to be able to write an efficient duplication scanning below and to have deterministic duplication reporting. Comparator duplicateComparator = new ComparatorAnonymousInnerClass(this); (new ParallelSort(radix, As5ByteLongArray(_collisionNodeIdCache), _numberOfCollisions - 1, _collisionTrackerCache, _processorsForParallelWork, progress, duplicateComparator)).run(); // Here we have a populated C // We want to detect duplicate input ids within it long previousEid = 0; int previousGroupId = -1; SameInputIdDetector detector = new SameInputIdDetector(); progress.Started("DEDUPLICATE"); for (int i = 0; i < _numberOfCollisions; i++) { long collisionIndex = _collisionTrackerCache.get(i); long nodeId = _collisionNodeIdCache.get5ByteLong(collisionIndex, 0); long offset = _collisionNodeIdCache.get6ByteLong(collisionIndex, 5); long eid = _dataCache.get(nodeId); int groupId = GroupOf(nodeId); // collisions of same eId AND groupId are always together bool same = eid == previousEid && previousGroupId == groupId; if (!same) { detector.Clear(); } // Potential duplicate object inputId = _collisionValues.get(offset); long nonDuplicateNodeId = detector.Add(nodeId, inputId); if (nonDuplicateNodeId != -1) { // Duplicate collector.CollectDuplicateNode(inputId, nodeId, _groups.get(groupId).name()); _trackerCache.markAsDuplicate(nodeId); UnmarkAsCollision(nonDuplicateNodeId); } previousEid = eid; previousGroupId = groupId; progress.Add(1); } progress.Done(); }
internal EncodingIdMapper(NumberArrayFactory cacheFactory, Encoder encoder, Factory <Radix> radixFactory, Monitor monitor, TrackerFactory trackerFactory, Groups groups, System.Func <long, CollisionValues> collisionValuesFactory, int chunkSize, int processorsForParallelWork, Comparator comparator) { this._radixFactory = radixFactory; this._monitor = monitor; this._cacheFactory = cacheFactory; this._trackerFactory = trackerFactory; this._collisionValuesFactory = collisionValuesFactory; this._comparator = comparator; this._processorsForParallelWork = max(processorsForParallelWork, 1); this._dataCache = cacheFactory.NewDynamicLongArray(chunkSize, GAP_VALUE); this._groupCache = GroupCache.select(cacheFactory, chunkSize, groups.Size()); this._groups = groups; this._encoder = encoder; this._radix = radixFactory.NewInstance(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#: //ORIGINAL LINE: private void buildCollisionInfo(System.Func<long, Object> inputIdLookup, long pessimisticNumberOfCollisions, org.neo4j.unsafe.impl.batchimport.input.Collector collector, org.neo4j.helpers.progress.ProgressListener progress) throws InterruptedException private void BuildCollisionInfo(System.Func <long, object> inputIdLookup, long pessimisticNumberOfCollisions, Collector collector, ProgressListener progress) { progress.Started("RESOLVE (~" + pessimisticNumberOfCollisions + " collisions)"); Radix radix = _radixFactory.newInstance(); _collisionNodeIdCache = _cacheFactory.newByteArray(pessimisticNumberOfCollisions, new sbyte[COLLISION_ENTRY_SIZE]); _collisionTrackerCache = _trackerFactory.create(_cacheFactory, pessimisticNumberOfCollisions); _collisionValues = _collisionValuesFactory.apply(pessimisticNumberOfCollisions); for (long nodeId = 0; nodeId <= _highestSetIndex; nodeId++) { long eId = _dataCache.get(nodeId); if (IsCollision(eId)) { // Store this collision input id for matching later in get() long collisionIndex = _numberOfCollisions++; object id = inputIdLookup(nodeId); long eIdFromInputId = Encode(id); long eIdWithoutCollisionBit = ClearCollision(eId); Debug.Assert(eIdFromInputId == eIdWithoutCollisionBit, format("Encoding mismatch during building of " + "collision info. input id %s (a %s) marked as collision where this id was encoded into " + "%d when put, but was now encoded into %d", id, id.GetType().Name, eIdWithoutCollisionBit, eIdFromInputId)); long offset = _collisionValues.add(id); _collisionNodeIdCache.set5ByteLong(collisionIndex, 0, nodeId); _collisionNodeIdCache.set6ByteLong(collisionIndex, 5, offset); // The base of our sorting this time is going to be node id, so register that in the radix radix.RegisterRadixOf(eIdWithoutCollisionBit); } progress.Add(1); } progress.Done(); // Detect input id duplicates within the same group, with source information, line number and the works DetectDuplicateInputIds(radix, collector, progress); // We won't be needing these anymore _collisionTrackerCache.close(); _collisionTrackerCache = null; }