public CsvGroupInputIterator(IEnumerator <DataFactory> source, Header.Factory headerFactory, IdType idType, Configuration config, Collector badCollector, Groups groups)
 {
     this._source        = source;
     this._headerFactory = headerFactory;
     this._idType        = idType;
     this._config        = config;
     this._badCollector  = badCollector;
     this._groups        = groups;
 }
Beispiel #2
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
//ORIGINAL LINE: private long[] sample(Iterable<DataFactory> dataFactories, Header.Factory headerFactory, System.Func<org.neo4j.values.storable.Value[], int> valueSizeCalculator, System.Func<org.neo4j.unsafe.impl.batchimport.input.InputEntity, int> additionalCalculator) throws java.io.IOException
        private long[] Sample(IEnumerable <DataFactory> dataFactories, Header.Factory headerFactory, System.Func <Value[], int> valueSizeCalculator, System.Func <InputEntity, int> additionalCalculator)
        {
            long[] estimates = new long[4];               // [entity count, property count, property size, labels (for nodes only)]
            using (CsvInputChunkProxy chunk = new CsvInputChunkProxy())
            {
                // One group of input files
                int groupId = 0;
                foreach (DataFactory dataFactory in dataFactories)                           // one input group
                {
                    groupId++;
                    Header header = null;
                    Data   data   = dataFactory.Create(_config);
                    RawIterator <CharReadable, IOException> sources = data.Stream();
                    while (sources.HasNext())
                    {
                        using (CharReadable source = sources.Next())
                        {
                            if (header == null)
                            {
                                // Extract the header from the first file in this group
                                header = extractHeader(source, headerFactory, _idType, _config, _groups);
                            }
                            using (CsvInputIterator iterator = new CsvInputIterator(source, data.Decorator(), header, _config, _idType, EMPTY, extractors(_config), groupId), InputEntity entity = new InputEntity())
                            {
                                int entities     = 0;
                                int properties   = 0;
                                int propertySize = 0;
                                int additional   = 0;
                                while (iterator.Position() < _estimateSampleSize && iterator.Next(chunk))
                                {
                                    for ( ; chunk.Next(entity); entities++)
                                    {
                                        properties   += entity.PropertyCount();
                                        propertySize += calculatePropertySize(entity, valueSizeCalculator);
                                        additional   += additionalCalculator(entity);
                                    }
                                }
                                if (entities > 0)
                                {
                                    long entityCountInSource = ( long )((( double )source.Length() / iterator.Position()) * entities);
                                    estimates[0] += entityCountInSource;
                                    estimates[1] += ( long )((( double )properties / entities) * entityCountInSource);
                                    estimates[2] += ( long )((( double )propertySize / entities) * entityCountInSource);
                                    estimates[3] += ( long )((( double )additional / entities) * entityCountInSource);
                                }
                            }
                        }
                    }
                }
            }
            return(estimates);
        }
Beispiel #3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
//ORIGINAL LINE: static Header extractHeader(org.neo4j.csv.reader.CharReadable stream, Header.Factory headerFactory, IdType idType, Configuration config, org.neo4j.unsafe.impl.batchimport.input.Groups groups) throws java.io.IOException
        internal static Header ExtractHeader(CharReadable stream, Header.Factory headerFactory, IdType idType, Configuration config, Groups groups)
        {
            if (!headerFactory.Defined)
            {
                char[] firstLineBuffer = Readables.extractFirstLineFrom(stream);
                // make the chunk slightly bigger than the header to not have the seeker think that it's reading
                // a value bigger than its max buffer size
                ChunkImpl firstChunk = new ChunkImpl(copyOf(firstLineBuffer, firstLineBuffer.Length + 1));
                firstChunk.Initialize(firstLineBuffer.Length, stream.SourceDescription());
                CharSeeker firstSeeker = Seeker(firstChunk, config);
                return(headerFactory.Create(firstSeeker, config, idType, groups));
            }

            return(headerFactory.Create(null, null, null, null));
        }
Beispiel #4
0
        internal CsvInput(IEnumerable <DataFactory> nodeDataFactory, Header.Factory nodeHeaderFactory, IEnumerable <DataFactory> relationshipDataFactory, Header.Factory relationshipHeaderFactory, IdType idType, Configuration config, Collector badCollector, Monitor monitor, Groups groups)
        {
            AssertSaneConfiguration(config);

            this._nodeDataFactory           = nodeDataFactory;
            this._nodeHeaderFactory         = nodeHeaderFactory;
            this._relationshipDataFactory   = relationshipDataFactory;
            this._relationshipHeaderFactory = relationshipHeaderFactory;
            this._idType       = idType;
            this._config       = config;
            this._badCollector = badCollector;
            this._monitor      = monitor;
            this._groups       = groups;

            VerifyHeaders();
            WarnAboutDuplicateSourceFiles();
        }
Beispiel #5
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
//ORIGINAL LINE: private DataFactory generateData(Header.Factory factory, org.apache.commons.lang3.mutable.MutableLong start, long count, long nodeCount, String headerString, String fileName, org.neo4j.unsafe.impl.batchimport.input.Groups groups) throws java.io.IOException
        private DataFactory GenerateData(Header.Factory factory, MutableLong start, long count, long nodeCount, string headerString, string fileName, Groups groups)
        {
            File   file   = Directory.file(fileName);
            Header header = factory.Create(charSeeker(wrap(headerString), COMMAS, false), COMMAS, IdType.Integer, groups);
            Distribution <string>    distribution    = new Distribution <string>(new string[] { "Token" });
            Deserialization <string> deserialization = new StringDeserialization(COMMAS);

            using (PrintWriter @out = new PrintWriter(new StreamWriter(file)), RandomEntityDataGenerator generator = new RandomEntityDataGenerator(nodeCount, count, toIntExact(count), Random.seed(), start.longValue(), header, distribution, distribution, 0, 0), InputChunk chunk = generator.NewChunk(), InputEntity entity = new InputEntity())
            {
                @out.println(headerString);
                while (generator.Next(chunk))
                {
                    while (chunk.Next(entity))
                    {
                        @out.println(convert(entity, deserialization, header));
                    }
                }
            }
            start.add(count);
            return(DataFactories.Data(InputEntityDecorators.NO_DECORATOR, Charsets.UTF_8, file));
        }
Beispiel #6
0
//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
//ORIGINAL LINE: @Test public void shouldParseHeaderFromFirstLineOfFirstInputFile() throws Exception
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
        public virtual void ShouldParseHeaderFromFirstLineOfFirstInputFile()
        {
            // GIVEN
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.neo4j.csv.reader.CharReadable firstSource = wrap("id:ID\tname:String\tbirth_date:long");
            CharReadable firstSource = wrap("id:ID\tname:String\tbirth_date:long");
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.neo4j.csv.reader.CharReadable secondSource = wrap("0\tThe node\t123456789");
            CharReadable secondSource = wrap("0\tThe node\t123456789");
            DataFactory  dataFactory  = DataFactories.Data(value => value, () => new MultiReadable(Readables.iterator(IOFunctions.identity(), firstSource, secondSource)));

            Header.Factory headerFactory = defaultFormatNodeFileHeader();
            Extractors     extractors    = new Extractors(';');

            // WHEN
            CharSeeker seeker = CharSeekers.charSeeker(new MultiReadable(dataFactory.Create(_tabs).stream()), _tabs, false);
            Header     header = headerFactory.Create(seeker, _tabs, IdType.Actual, _groups);

            // THEN
            assertArrayEquals(array(Entry("id", Type.Id, extractors.Long_()), Entry("name", Type.Property, extractors.String()), Entry("birth_date", Type.Property, extractors.Long_())), header.Entries());
            seeker.Dispose();
        }
Beispiel #7
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in C#:
//ORIGINAL LINE: CsvInputIterator(org.neo4j.csv.reader.CharReadable stream, Decorator decorator, Header.Factory headerFactory, IdType idType, Configuration config, org.neo4j.unsafe.impl.batchimport.input.Groups groups, org.neo4j.unsafe.impl.batchimport.input.Collector badCollector, org.neo4j.csv.reader.Extractors extractors, int groupId) throws java.io.IOException
        internal CsvInputIterator(CharReadable stream, Decorator decorator, Header.Factory headerFactory, IdType idType, Configuration config, Groups groups, Collector badCollector, Extractors extractors, int groupId) : this(stream, decorator, ExtractHeader(stream, headerFactory, idType, config, groups), config, idType, badCollector, extractors, groupId)
        {
        }
Beispiel #8
0
 /// <param name="nodeDataFactory"> multiple <seealso cref="DataFactory"/> instances providing data, each <seealso cref="DataFactory"/>
 /// specifies an input group with its own header, extracted by the {@code nodeHeaderFactory}. From the outside
 /// it looks like one stream of nodes. </param>
 /// <param name="nodeHeaderFactory"> factory for reading node headers. </param>
 /// <param name="relationshipDataFactory"> multiple <seealso cref="DataFactory"/> instances providing data, each <seealso cref="DataFactory"/>
 /// specifies an input group with its own header, extracted by the {@code relationshipHeaderFactory}.
 /// From the outside it looks like one stream of relationships. </param>
 /// <param name="relationshipHeaderFactory"> factory for reading relationship headers. </param>
 /// <param name="idType"> <seealso cref="IdType"/> to expect in id fields of node and relationship input. </param>
 /// <param name="config"> CSV configuration. </param>
 /// <param name="badCollector"> Collector getting calls about bad input data. </param>
 /// <param name="monitor"> <seealso cref="Monitor"/> for internal events. </param>
 public CsvInput(IEnumerable <DataFactory> nodeDataFactory, Header.Factory nodeHeaderFactory, IEnumerable <DataFactory> relationshipDataFactory, Header.Factory relationshipHeaderFactory, IdType idType, Configuration config, Collector badCollector, Monitor monitor) : this(nodeDataFactory, nodeHeaderFactory, relationshipDataFactory, relationshipHeaderFactory, idType, config, badCollector, monitor, new Groups())
 {
 }
Beispiel #9
0
 private InputIterator Stream(IEnumerable <DataFactory> data, Header.Factory headerFactory)
 {
     return(new CsvGroupInputIterator(data.GetEnumerator(), headerFactory, _idType, _config, _badCollector, _groups));
 }