private static Stopwatch ParseDisks(Action<Disk> addToBatch) { int i = 0; var parser = new Parser(); var buffer = new byte[1024 * 1024];// more than big enough for all files var sp = Stopwatch.StartNew(); using (var bz2 = new BZip2InputStream(File.Open(@"D:\Data\freedb-complete-20120101.tar.bz2", FileMode.Open))) using (var tar = new TarInputStream(bz2)) { TarEntry entry; while ((entry = tar.GetNextEntry()) != null) { if (entry.Size == 0 || entry.Name == "README" || entry.Name == "COPYING") continue; var readSoFar = 0; while (true) { var read = tar.Read(buffer, readSoFar, ((int)entry.Size) - readSoFar); if (read == 0) break; readSoFar += read; } // we do it in this fashion to have the stream reader detect the BOM / unicode / other stuff // so we can read the values properly var fileText = new StreamReader(new MemoryStream(buffer, 0, readSoFar)).ReadToEnd(); try { var disk = parser.Parse(fileText); addToBatch(disk); if (i++ % 1000 == 0) Console.Write("\r{0} {1:#,#} {2} ", entry.Name, i, sp.Elapsed); if (i % 50000 == 0) return sp; } catch (Exception e) { Console.WriteLine(); Console.WriteLine(entry.Name); Console.WriteLine(e); return sp; } } } return sp; }
private void ParseEntries() { var parser = new Parser(); while (true) { var entry = _entries.Take(); if (entry == null) break; var disk = parser.Parse(entry); _disks.Add(disk); Interlocked.Increment(ref parsed); } _disks.Add(null); }
private static void ParseDisks(BulkInsertOperation insert) { int i = 0; var parser = new Parser(); var buffer = new byte[1024*1024];// more than big enough for all files using (var bz2 = new BZip2InputStream(File.Open(@"D:\Scratch\freedb-complete-20150101.tar.bz2", FileMode.Open))) using (var tar = new TarInputStream(bz2)) { TarEntry entry; while((entry=tar.GetNextEntry()) != null) { if(entry.Size == 0 || entry.Name == "README" || entry.Name == "COPYING") continue; var readSoFar = 0; while(true) { var read = tar.Read(buffer, readSoFar, ((int) entry.Size) - readSoFar); if (read == 0) break; readSoFar += read; } // we do it in this fashion to have the stream reader detect the BOM / unicode / other stuff // so we can read the values properly var fileText = new StreamReader(new MemoryStream(buffer,0, readSoFar)).ReadToEnd(); try { var disk = parser.Parse(fileText); insert.Store(disk); } catch (Exception e) { Console.WriteLine(); Console.WriteLine(entry.Name); Console.WriteLine(e); } } } }