예제 #1
0
		output_subframe_fixed(FLACCLTask task, FlacSubframeInfo sub, int index)
		{
			FlacFrame frame = task.frame;

			// warm-up samples
			for (int i = 0; i < sub.best.order; i++)
				frame.writer.writebits_signed(sub.obits, sub.samples[i]);

			// residual
			output_residual(task, sub, sub.obits * sub.best.order, index);
		}
예제 #2
0
		public unsafe void do_output_frames(int nFrames)
		{
			send_to_GPU(task1, nFrames, eparams.block_size);
			run_GPU_task(task1);
			if (task2.frameCount > 0)
				task2.openCLCQ.Finish();
			if (task2.frameCount > 0)
			{
				if (cpu_tasks != null)
				{
					wait_for_cpu_task();
					
					FLACCLTask ttmp = cpu_tasks[oldest_cpu_task];
					cpu_tasks[oldest_cpu_task] = task2;
					task2 = ttmp;

					start_cpu_task();					

					oldest_cpu_task = (oldest_cpu_task + 1) % cpu_tasks.Length;
					
					if (task2.frameCount > 0)
						write_result(task2);
				}
				else
				{
					process_result(task2);
					write_result(task2);
				}
			}
			int bs = eparams.block_size * nFrames;
			samplesInBuffer -= bs;
			if (samplesInBuffer > 0)
				AudioSamples.MemCpy(
					((byte*)task2.clSamplesBytesPtr),
					((byte*)task1.clSamplesBytesPtr) + bs * _pcm.BlockAlign, 
					samplesInBuffer * _pcm.BlockAlign);
			FLACCLTask tmp = task1;
			task1 = task2;
			task2 = tmp;
			task1.frameCount = 0;
		}
예제 #3
0
		unsafe void output_residual(FLACCLTask task, FlacSubframeInfo sub, int offs0, int index)
		{
			FlacFrame frame = task.frame;

			// rice-encoded block
			frame.writer.writebits(2, sub.best.rc.coding_method);
			// partition order
			int porder = sub.best.rc.porder;
			//assert(porder >= 0);
			frame.writer.writebits(4, porder);

			if (task.UseGPURice)
			{
				int len = task.BestResidualTasks[index].size - task.BestResidualTasks[index].headerLen;
				int pos = task.BestResidualTasks[index].encodingOffset;
				if (task.BestResidualTasks[index].size != (int)sub.best.size)
					throw new Exception("Encoding offset mismatch");
				if (task.BestResidualTasks[index].headerLen != offs0 + 6)
					throw new Exception("Encoding offset mismatch");
				if (pos % 8 != frame.writer.BitLength % 8)
					throw new Exception("Encoding offset mismatch");
				//Console.WriteLine("{0:x} => {1:x}", _totalSize + frame.writer.BitLength / 8, _totalSize + (frame.writer.BitLength + len) / 8);
				// task.BestResidualTasks[index].headerLen
				frame.writer.writeints(len, pos, (byte*)task.clRiceOutputPtr);
			}
			else
			{
				int psize = frame.blocksize >> porder;
				int res_cnt = psize - sub.best.order;

				// residual
				int j = sub.best.order;
				fixed (byte* fixbuf = frame.writer.Buffer)
					for (int p = 0; p < (1 << porder); p++)
					{
						int k = sub.best.rc.rparams[p];
						frame.writer.writebits(4 + sub.best.rc.coding_method, k);
						if (p == 1) res_cnt = psize;
						int cnt = Math.Min(res_cnt, frame.blocksize - j);
						frame.writer.write_rice_block_signed(fixbuf, k, sub.best.residual + j, cnt);
						j += cnt;
					}
			}
		}
예제 #4
0
		unsafe void write_result(FLACCLTask task)
		{
			int iSample = task.frameSize * task.frameCount;

			if (seek_table != null && _IO.CanSeek)
				for (int sp = 0; sp < seek_table.Length; sp++)
				{
					if (seek_table[sp].number >= task.framePos + iSample)
						break;
					if (seek_table[sp].number >= task.framePos)
						seek_table[sp].offset += _IO.Position - first_frame_offset;
				}
			_IO.Write(task.outputBuffer, 0, task.outputSize);
			_position += iSample;
			_totalSize += task.outputSize;
		}
예제 #5
0
		public unsafe void InitTasks()
		{
			bool doMidside = channels == 2 && eparams.do_midside;
			int channelCount = doMidside ? 2 * channels : channels;

			if (!inited)
			{
				if (OpenCL.NumberOfPlatforms < 1)
					throw new Exception("no opencl platforms found");

				int groupSize = _settings.DeviceType == OpenCLDeviceType.CPU ? 1 : _settings.GroupSize;
				OCLMan = new OpenCLManager();
				// Attempt to save binaries after compilation, as well as load precompiled binaries
				// to avoid compilation. Usually you'll want this to be true. 
				OCLMan.AttemptUseBinaries = true; // true;
				// Attempt to compile sources. This should probably be true for almost all projects.
				// Setting it to false means that when you attempt to compile "mysource.cl", it will
				// only scan the precompiled binary directory for a binary corresponding to a source
				// with that name. There's a further restriction that the compiled binary also has to
				// use the same Defines and BuildOptions
				OCLMan.AttemptUseSource = true;
				// Binary and source paths
				// This is where we store our sources and where compiled binaries are placed
				//OCLMan.BinaryPath = @"OpenCL\bin";
				//OCLMan.SourcePath = @"OpenCL\src";
				// If true, RequireImageSupport will filter out any devices without image support
				// In this project we don't need image support though, so we set it to false
				OCLMan.RequireImageSupport = false;
				// The BuildOptions string is passed directly to clBuild and can be used to do debug builds etc
				OCLMan.BuildOptions = "";
				OCLMan.SourcePath = System.IO.Path.GetDirectoryName(GetType().Assembly.Location);
				OCLMan.BinaryPath = System.IO.Path.Combine(System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "CUE Tools"), "OpenCL");
				int platformId = 0;
				if (_settings.Platform != null)
				{
					platformId = -1;
					string platforms = "";
					for (int i = 0; i < OpenCL.NumberOfPlatforms; i++)
					{
						var platform = OpenCL.GetPlatform(i);
						platforms += " \"" + platform.Name + "\"";
						if (platform.Name.Equals(_settings.Platform, StringComparison.InvariantCultureIgnoreCase))
						{
							platformId = i;
							break;
						}
					}
					if (platformId < 0)
						throw new Exception("unknown platform \"" + _settings.Platform + "\". Platforms available:" + platforms);
				}
				OCLMan.CreateDefaultContext(platformId, (DeviceType)_settings.DeviceType);

				this.framesPerTask = (int)OCLMan.Context.Devices[0].MaxComputeUnits * Math.Max(1, _settings.TaskSize / channels);

				bool UseGPUOnly = _settings.GPUOnly && OCLMan.Context.Devices[0].Extensions.Contains("cl_khr_local_int32_extended_atomics");
				bool UseGPURice = UseGPUOnly && _settings.DoRice;

				if (_blocksize == 0)
				{
					if (eparams.block_size == 0)
						eparams.block_size = select_blocksize(sample_rate, eparams.block_time_ms);
					_blocksize = eparams.block_size;
				}
				else
					eparams.block_size = _blocksize;

				int maxBS = 1 << (BitReader.log2i(eparams.block_size - 1) + 1);

				// The Defines string gets prepended to any and all sources that are compiled
				// and serve as a convenient way to pass configuration information to the compilation process
				OCLMan.Defines =
					"#define MAX_ORDER " + eparams.max_prediction_order.ToString() + "\n" +
					"#define GROUP_SIZE " + groupSize.ToString() + "\n" +
					"#define FLACCL_VERSION \"" + vendor_string + "\"\n" +
					(UseGPUOnly ? "#define DO_PARTITIONS\n" : "") +
					(UseGPURice ? "#define DO_RICE\n" : "") +
					"#define BITS_PER_SAMPLE " + PCM.BitsPerSample + "\n" +
					"#define MAX_BLOCKSIZE " + maxBS + "\n" +
					"#define MAX_CHANNELS " + PCM.ChannelCount + "\n" +
#if DEBUG
					"#define DEBUG\n" +
#endif
					(_settings.DeviceType == OpenCLDeviceType.CPU ? "#define FLACCL_CPU\n" : "") +
					_settings.Defines + "\n";

				var exts = new string[] { "cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics", "cl_khr_fp64", "cl_amd_fp64" };
				foreach (string extension in exts)
					if (OCLMan.Context.Devices[0].Extensions.Contains(extension))
					{
						OCLMan.Defines += "#pragma OPENCL EXTENSION " + extension + ": enable\n";
						OCLMan.Defines += "#define HAVE_" + extension + "\n";
					}

				try
				{
					openCLProgram = OCLMan.CompileFile("flac.cl");
				}
				catch (OpenCLBuildException ex)
				{
					string buildLog = ex.BuildLogs[0];
					throw ex;
				}
				//using (Stream kernel = GetType().Assembly.GetManifestResourceStream(GetType(), "flac.cl"))
				//using (StreamReader sr = new StreamReader(kernel))
				//{
				//    try
				//    {
				//        openCLProgram = OCLMan.CompileSource(sr.ReadToEnd()); ;
				//    }
				//    catch (OpenCLBuildException ex)
				//    {
				//        string buildLog = ex.BuildLogs[0];
				//        throw ex;
				//    }
				//}
#if TTTTKJHSKJH
				var openCLPlatform = OpenCL.GetPlatform(0);
				openCLContext = openCLPlatform.CreateDefaultContext();
				using (Stream kernel = GetType().Assembly.GetManifestResourceStream(GetType(), "flac.cl"))
				using (StreamReader sr = new StreamReader(kernel))
					openCLProgram = openCLContext.CreateProgramWithSource(sr.ReadToEnd());
				try
				{
					openCLProgram.Build();
				}
				catch (OpenCLException)
				{
					string buildLog = openCLProgram.GetBuildLog(openCLProgram.Devices[0]);
					throw;
				}
#endif

				if (_IO == null)
					_IO = new FileStream(_path, FileMode.Create, FileAccess.Write, FileShare.Read);
				int header_size = flake_encode_init();
				_IO.Write(header, 0, header_size);
				_totalSize += header_size;
				if (_IO.CanSeek)
					first_frame_offset = _IO.Position;

				task1 = new FLACCLTask(openCLProgram, channelCount, channels, bits_per_sample, max_frame_size, this, groupSize, UseGPUOnly, UseGPURice);
				task2 = new FLACCLTask(openCLProgram, channelCount, channels, bits_per_sample, max_frame_size, this, groupSize, UseGPUOnly, UseGPURice);
				if (_settings.CPUThreads > 0)
				{
					cpu_tasks = new FLACCLTask[_settings.CPUThreads];
					for (int i = 0; i < cpu_tasks.Length; i++)
						cpu_tasks[i] = new FLACCLTask(openCLProgram, channelCount, channels, bits_per_sample, max_frame_size, this, groupSize, UseGPUOnly, UseGPURice);
				}
				inited = true;
			}
		}
예제 #6
0
		unsafe void initializeSubframeTasks(int blocksize, int channelsCount, int nFrames, FLACCLTask task)
		{
			task.channelSize = ((blocksize + 3) & ~3) * nFrames;
			task.frameSize = blocksize;
			task.nWindowFunctions = 0;
			if (task.frameSize > 4)
			{
				calculate_window(task, lpc.window_welch, WindowFunction.Welch);
				calculate_window(task, lpc.window_flattop, WindowFunction.Flattop);
				calculate_window(task, lpc.window_tukey, WindowFunction.Tukey);
				calculate_window(task, lpc.window_hann, WindowFunction.Hann);
				calculate_window(task, lpc.window_bartlett, WindowFunction.Bartlett);
				if (task.nWindowFunctions == 0)
					throw new Exception("invalid windowfunction");
                if (!task.UseMappedMemory)
				    task.openCLCQ.EnqueueWriteBuffer(task.clWindowFunctions, false, 0, sizeof(float) * task.nWindowFunctions * task.frameSize, task.clWindowFunctionsPtr);
			}

			task.nResidualTasks = 0;
			task.nTasksPerWindow = Math.Min(32, eparams.orders_per_window);
			task.nResidualTasksPerChannel = task.nWindowFunctions * task.nTasksPerWindow + (eparams.do_constant ? 1 : 0) + Math.Max(0, 1 + eparams.max_fixed_order - eparams.min_fixed_order);
            if (task.nResidualTasksPerChannel > 32)
                throw new Exception("too many tasks");
			if (channels == 2 && channelsCount == 4)
				task.nEstimateTasksPerChannel = Math.Min(eparams.orders_per_channel, task.nResidualTasksPerChannel);
			else
				task.nEstimateTasksPerChannel = task.nResidualTasksPerChannel;

			//if (task.nResidualTasksPerChannel >= 4)
			//    task.nResidualTasksPerChannel = (task.nResidualTasksPerChannel + 7) & ~7;
			for (int iFrame = 0; iFrame < nFrames; iFrame++)
			{
				for (int ch = 0; ch < channelsCount; ch++)
				{
					int *selectedTasks = (int*)task.clSelectedTasksPtr;
					for (int j = 0; j < task.nEstimateTasksPerChannel; j++)
					{
						int k = j;
						if (j < task.nWindowFunctions * task.nTasksPerWindow && task.nWindowFunctions > 1)
						{
							k = (j % task.nWindowFunctions) * task.nTasksPerWindow
								+ (j / task.nWindowFunctions);
						}
						selectedTasks[(iFrame * channelsCount + ch) * task.nEstimateTasksPerChannel + j] =
							(iFrame * channelsCount + ch) * task.nResidualTasksPerChannel + k;
					}

					for (int iWindow = 0; iWindow < task.nWindowFunctions; iWindow++)
					{
						// LPC tasks
						for (int order = 0; order < task.nTasksPerWindow; order++)
						{
							task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.LPC;
							task.ResidualTasks[task.nResidualTasks].channel = ch;
							task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
							task.ResidualTasks[task.nResidualTasks].abits = task.ResidualTasks[task.nResidualTasks].obits;
							task.ResidualTasks[task.nResidualTasks].blocksize = blocksize;
							task.ResidualTasks[task.nResidualTasks].residualOrder = order + 1;
							task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * task.channelSize + iFrame * blocksize;
							task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs;
							task.ResidualTasks[task.nResidualTasks].wbits = 0;
							task.ResidualTasks[task.nResidualTasks].coding_method = PCM.BitsPerSample > 16 ? 1 : 0;
							task.ResidualTasks[task.nResidualTasks].size = task.ResidualTasks[task.nResidualTasks].obits * blocksize;
							task.nResidualTasks++;
						}
					}
					// Constant frames
					if (eparams.do_constant)
					{
						task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.Constant;
						task.ResidualTasks[task.nResidualTasks].channel = ch;
						task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
						task.ResidualTasks[task.nResidualTasks].abits = task.ResidualTasks[task.nResidualTasks].obits;
						task.ResidualTasks[task.nResidualTasks].blocksize = blocksize;
						task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * task.channelSize + iFrame * blocksize;
						task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs;
						task.ResidualTasks[task.nResidualTasks].wbits = 0;
						task.ResidualTasks[task.nResidualTasks].coding_method = PCM.BitsPerSample > 16 ? 1 : 0;
						task.ResidualTasks[task.nResidualTasks].size = task.ResidualTasks[task.nResidualTasks].obits * blocksize;
						task.ResidualTasks[task.nResidualTasks].residualOrder = 1;
						task.ResidualTasks[task.nResidualTasks].shift = 0;
						task.ResidualTasks[task.nResidualTasks].coefs[0] = 1;
						task.nResidualTasks++;
					}
					// Fixed prediction
					for (int order = eparams.min_fixed_order; order <= eparams.max_fixed_order; order++)
					{
						task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.Fixed;
						task.ResidualTasks[task.nResidualTasks].channel = ch;
						task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
						task.ResidualTasks[task.nResidualTasks].abits = task.ResidualTasks[task.nResidualTasks].obits;
						task.ResidualTasks[task.nResidualTasks].blocksize = blocksize;
						task.ResidualTasks[task.nResidualTasks].residualOrder = order;
						task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * task.channelSize + iFrame * blocksize;
						task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs;
						task.ResidualTasks[task.nResidualTasks].wbits = 0;
						task.ResidualTasks[task.nResidualTasks].coding_method = PCM.BitsPerSample > 16 ? 1 : 0;
						task.ResidualTasks[task.nResidualTasks].size = task.ResidualTasks[task.nResidualTasks].obits * blocksize;
						task.ResidualTasks[task.nResidualTasks].shift = 0;
						switch (order)
						{
							case 0:
								break;
							case 1:
								task.ResidualTasks[task.nResidualTasks].coefs[0] = 1;
								break;
							case 2:
								task.ResidualTasks[task.nResidualTasks].coefs[1] = 2;
								task.ResidualTasks[task.nResidualTasks].coefs[0] = -1;
								break;
							case 3:
								task.ResidualTasks[task.nResidualTasks].coefs[2] = 3;
								task.ResidualTasks[task.nResidualTasks].coefs[1] = -3;
								task.ResidualTasks[task.nResidualTasks].coefs[0] = 1;
								break;
							case 4:
								task.ResidualTasks[task.nResidualTasks].coefs[3] = 4;
								task.ResidualTasks[task.nResidualTasks].coefs[2] = -6;
								task.ResidualTasks[task.nResidualTasks].coefs[1] = 4;
								task.ResidualTasks[task.nResidualTasks].coefs[0] = -1;
								break;
						}
						task.nResidualTasks++;
					}
					//// Filler
					//while ((task.nResidualTasks % task.nResidualTasksPerChannel) != 0)
					//{
					//    task.ResidualTasks[task.nResidualTasks].type = (int)SubframeType.Verbatim;
					//    task.ResidualTasks[task.nResidualTasks].channel = ch;
					//    task.ResidualTasks[task.nResidualTasks].obits = (int)bits_per_sample + (channels == 2 && ch == 3 ? 1 : 0);
					//    task.ResidualTasks[task.nResidualTasks].abits = task.ResidualTasks[task.nResidualTasks].obits;
					//    task.ResidualTasks[task.nResidualTasks].blocksize = blocksize;
					//    task.ResidualTasks[task.nResidualTasks].residualOrder = 0;
					//    task.ResidualTasks[task.nResidualTasks].samplesOffs = ch * task.channelSize + iFrame * blocksize;
					//    task.ResidualTasks[task.nResidualTasks].residualOffs = task.ResidualTasks[task.nResidualTasks].samplesOffs;
					//    task.ResidualTasks[task.nResidualTasks].shift = 0;
					//    task.nResidualTasks++;
					//}
				}
			}
			if (sizeof(FLACCLSubframeTask) * task.nResidualTasks > task.residualTasksLen)
				throw new Exception("oops");

			if (!task.UseMappedMemory)
			{
				task.openCLCQ.EnqueueWriteBuffer(task.clResidualTasks, false, 0, sizeof(FLACCLSubframeTask) * task.nResidualTasks, task.clResidualTasksPtr);
				task.openCLCQ.EnqueueWriteBuffer(task.clSelectedTasks, false, 0, sizeof(int) * (nFrames * channelsCount * task.nEstimateTasksPerChannel), task.clSelectedTasksPtr);
			}
		}
예제 #7
0
		unsafe void process_result(FLACCLTask task)
		{
			bool doMidside = channels == 2 && eparams.do_midside;
			int channelCount = doMidside ? 2 * channels : channels;

			long iSample = 0;
			long iByte = 0;
			task.frame.writer.Reset();
			task.frame.writer_offset = 0;
			for (int iFrame = 0; iFrame < task.frameCount; iFrame++)
			{
				//if (0 != eparams.variable_block_size && 0 == (task.blocksize & 7) && task.blocksize >= 128)
				//    fs = encode_frame_vbs();
				//else
				int fn = task.frameNumber + (eparams.variable_block_size > 0 ? (int)iSample : iFrame);
				int fs = encode_frame(doMidside, channelCount, iFrame, task, fn);

				if (task.verify != null)
				{					
					int decoded = task.verify.DecodeFrame(task.frame.writer.Buffer, task.frame.writer_offset, fs);
					if (decoded != fs || task.verify.Remaining != task.frameSize)
						throw new Exception(string.Format("validation failed! frame size mismatch, iFrame={0}, decoded=={1}, fs=={2}", fn, decoded, fs));
					fixed (int* r = task.verify.Samples)
					{
						for (int ch = 0; ch < channels; ch++)
						{
							byte* res = ((byte*)task.clSamplesBytesPtr) + PCM.BlockAlign * iFrame * task.frameSize + ch * (PCM.BlockAlign / channels);
							int* smp = r + ch * Flake.MAX_BLOCKSIZE;
							int ba = PCM.BlockAlign;
							if (PCM.BitsPerSample == 16)
							{
								for (int i = task.frameSize; i > 0; i--)
								{
									//if (AudioSamples.MemCmp(s + iFrame * task.frameSize + ch * FLACCLWriter.MAX_BLOCKSIZE, r + ch * Flake.MAX_BLOCKSIZE, task.frameSize))
									int ress = *(short*)res;
									if (ress != *(smp++))
										throw new Exception(string.Format("validation failed! iFrame={0}, ch={1}", fn, ch));
									res += ba;
								}
							}
							else if (PCM.BitsPerSample == 24)
							{
								for (int i = task.frameSize; i > 0; i--)
								{
									//if (AudioSamples.MemCmp(s + iFrame * task.frameSize + ch * FLACCLWriter.MAX_BLOCKSIZE, r + ch * Flake.MAX_BLOCKSIZE, task.frameSize))
									int ress = (((int)res[0] << 8) + ((int)res[1] << 16) + ((int)res[2] << 24)) >> (8);
									if (ress != *(smp++))
										throw new Exception(string.Format("validation failed! iFrame={0}, ch={1}", iFrame, ch));
									res += ba;
								}
							}
							else
								throw new Exception("Invalid BPS");
						}
					}
				}

				if (seek_table != null && _IO.CanSeek)
				{
					for (int sp = 0; sp < seek_table.Length; sp++)
					{
						if (seek_table[sp].framesize != 0)
							continue;
						if (seek_table[sp].number >= task.framePos + iSample + task.frameSize)
							break;
						if (seek_table[sp].number >= task.framePos + iSample)
						{
							seek_table[sp].number = task.framePos + iSample;
							seek_table[sp].offset = iByte;
							seek_table[sp].framesize = task.frameSize;
						}
					}
				}

				//Array.Copy(task.frame.buffer, 0, task.outputBuffer, iByte, fs);

				iSample += task.frameSize;
				iByte += fs;
			}
			task.outputSize = (int)iByte;
			if (iByte != task.frame.writer.Length)
				throw new Exception("invalid length");
		}
예제 #8
0
		unsafe int encode_frame(bool doMidside, int channelCount, int iFrame, FLACCLTask task, int current_frame_number)
		{
			task.frame.InitSize(task.frameSize, eparams.variable_block_size != 0);
			task.frame.frame_number = iFrame;
			task.frame.ch_mode = ChannelMode.NotStereo;

			fixed (int* smp = task.samplesBuffer)
			{
				for (int ch = 0; ch < channelCount; ch++)
					task.frame.subframes[ch].Init(
						smp + ch * task.channelSize + iFrame * task.frameSize,
						((int*)task.clResidualPtr) + ch * task.channelSize + iFrame * task.frameSize,
						_pcm.BitsPerSample + (doMidside && ch == 3 ? 1 : 0), 0);

				encode_residual(task, channelCount, iFrame);

				//task.frame.writer.Reset();
				task.frame.frame_number = current_frame_number;
				task.frame.writer_offset = task.frame.writer.Length;

				output_frame_header(task.frame);
				output_subframes(task, iFrame);
				output_frame_footer(task.frame);
				if (task.frame.writer.Length - task.frame.writer_offset >= max_frame_size)
					throw new Exception("buffer overflow");

				return task.frame.writer.Length - task.frame.writer_offset;
			}
		}
예제 #9
0
		unsafe void send_to_GPU(FLACCLTask task, int nFrames, int blocksize)
		{
			bool doMidside = channels == 2 && eparams.do_midside;
			int channelsCount = doMidside ? 2 * channels : channels;
			if (blocksize != task.frameSize)
				task.nResidualTasks = 0;
			task.frameCount = nFrames;
			task.frameSize = blocksize;
			task.frameNumber = eparams.variable_block_size > 0 ? frame_pos : frame_count;
			task.framePos = frame_pos;
			frame_count += nFrames;
			frame_pos += nFrames * blocksize;
            if (!task.UseMappedMemory)
			    task.openCLCQ.EnqueueWriteBuffer(task.clSamplesBytes, false, 0, PCM.BlockAlign * blocksize * nFrames, task.clSamplesBytesPtr);
			//task.openCLCQ.EnqueueUnmapMemObject(task.clSamplesBytes, task.clSamplesBytes.HostPtr);
			//task.openCLCQ.EnqueueMapBuffer(task.clSamplesBytes, true, MapFlags.WRITE, 0, task.samplesBufferLen / 2);
		}
예제 #10
0
		/// <summary>
		/// Copy channel-interleaved input samples into separate subframes
		/// </summary>
		/// <param name="task"></param>
		/// <param name="doMidside"></param>
		unsafe void unpack_samples_24(FLACCLTask task, byte* srcptr, int count)
		{
			switch (task.frame.ch_mode)
			{
				case ChannelMode.NotStereo:
					for (int ch = 0; ch < channels; ch++)
					{
						int* s = task.frame.subframes[ch].samples;
						int wbits = (int)task.frame.subframes[ch].wbits;
						byte* src = &srcptr[ch * 3];
						for (int i = 0; i < count; i++)
						{
							s[i] = (((int)src[0] << 8) + ((int)src[1] << 16) + ((int)src[2] << 24)) >> (8 + wbits);
							src += PCM.BlockAlign;
						}
					}
					break;
				case ChannelMode.LeftRight:
					{
						int* left = task.frame.subframes[0].samples;
						int* right = task.frame.subframes[1].samples;
						int lwbits = (int)task.frame.subframes[0].wbits;
						int rwbits = (int)task.frame.subframes[1].wbits;
						for (int i = 0; i < count; i++)
						{
							int l = (((int)*(srcptr++) << 8) + ((int)*(srcptr++) << 16) + ((int)*(srcptr++) << 24)) >> 8;
							int r = (((int)*(srcptr++) << 8) + ((int)*(srcptr++) << 16) + ((int)*(srcptr++) << 24)) >> 8;
							left[i] = l >> lwbits;
							right[i] = r >> rwbits;
						}
						break;
					}
				case ChannelMode.LeftSide:
					{
						int* left = task.frame.subframes[0].samples;
						int* right = task.frame.subframes[1].samples;
						int lwbits = (int)task.frame.subframes[0].wbits;
						int rwbits = (int)task.frame.subframes[1].wbits;
						for (int i = 0; i < count; i++)
						{
							int l = (((int)*(srcptr++) << 8) + ((int)*(srcptr++) << 16) + ((int)*(srcptr++) << 24)) >> 8;
							int r = (((int)*(srcptr++) << 8) + ((int)*(srcptr++) << 16) + ((int)*(srcptr++) << 24)) >> 8;
							left[i] = l >> lwbits;
							right[i] = (l - r) >> rwbits;
						}
						break;
					}
				case ChannelMode.RightSide:
					{
						int* left = task.frame.subframes[0].samples;
						int* right = task.frame.subframes[1].samples;
						int lwbits = (int)task.frame.subframes[0].wbits;
						int rwbits = (int)task.frame.subframes[1].wbits;
						for (int i = 0; i < count; i++)
						{
							int l = (((int)*(srcptr++) << 8) + ((int)*(srcptr++) << 16) + ((int)*(srcptr++) << 24)) >> 8;
							int r = (((int)*(srcptr++) << 8) + ((int)*(srcptr++) << 16) + ((int)*(srcptr++) << 24)) >> 8;
							left[i] = (l - r) >> lwbits;
							right[i] = r >> rwbits;
						}
						break;
					}
				case ChannelMode.MidSide:
					{
						int* left = task.frame.subframes[0].samples;
						int* right = task.frame.subframes[1].samples;
						int lwbits = (int)task.frame.subframes[0].wbits;
						int rwbits = (int)task.frame.subframes[1].wbits;
						for (int i = 0; i < count; i++)
						{
							int l = (((int)*(srcptr++) << 8) + ((int)*(srcptr++) << 16) + ((int)*(srcptr++) << 24)) >> 8;
							int r = (((int)*(srcptr++) << 8) + ((int)*(srcptr++) << 16) + ((int)*(srcptr++) << 24)) >> 8;
							left[i] = (l + r) >> (1 + lwbits);
							right[i] = (l - r) >> rwbits;
						}
						break;
					}
			}
		}
예제 #11
0
		/// <summary>
		/// Copy channel-interleaved input samples into separate subframes
		/// </summary>
		/// <param name="task"></param>
		/// <param name="doMidside"></param>
		unsafe void unpack_samples(FLACCLTask task, int count)
		{
			int iFrame = task.frame.frame_number;
			byte* srcptr = ((byte*)task.clSamplesBytesPtr) + iFrame * task.frameSize * PCM.BlockAlign;
			if (PCM.BitsPerSample == 16)
				unpack_samples_16(task, srcptr, count);
			else if (PCM.BitsPerSample == 24)
				unpack_samples_24(task, srcptr, count);
			else
				throw new Exception("Invalid BPS");
		}
예제 #12
0
		/// <summary>
		/// Copy channel-interleaved input samples into separate subframes
		/// </summary>
		/// <param name="task"></param>
		/// <param name="doMidside"></param>
		unsafe void unpack_samples_16(FLACCLTask task, byte * srcptr, int count)
		{
			short* src = (short*)srcptr;

			switch (task.frame.ch_mode)
			{
				case ChannelMode.NotStereo:
					for (int ch = 0; ch < channels; ch++)
					{
						int* s = task.frame.subframes[ch].samples;
						int wbits = (int)task.frame.subframes[ch].wbits;
						for (int i = 0; i < count; i++)
							s[i] = src[i * channels + ch] >> wbits;
					}
					break;
				case ChannelMode.LeftRight:
					{
						int* left = task.frame.subframes[0].samples;
						int* right = task.frame.subframes[1].samples;
						int lwbits = (int)task.frame.subframes[0].wbits;
						int rwbits = (int)task.frame.subframes[1].wbits;
						for (int i = 0; i < count; i++)
						{
							int l = *(src++);
							int r = *(src++);
							left[i] = l >> lwbits;
							right[i] = r >> rwbits;
						}
						break;
					}
				case ChannelMode.LeftSide:
					{
						int* left = task.frame.subframes[0].samples;
						int* right = task.frame.subframes[1].samples;
						int lwbits = (int)task.frame.subframes[0].wbits;
						int rwbits = (int)task.frame.subframes[1].wbits;
						for (int i = 0; i < count; i++)
						{
							int l = *(src++);
							int r = *(src++);
							left[i] = l >> lwbits;
							right[i] = (l - r) >> rwbits;
						}
						break;
					}
				case ChannelMode.RightSide:
					{
						int* left = task.frame.subframes[0].samples;
						int* right = task.frame.subframes[1].samples;
						int lwbits = (int)task.frame.subframes[0].wbits;
						int rwbits = (int)task.frame.subframes[1].wbits;
						for (int i = 0; i < count; i++)
						{
							int l = *(src++);
							int r = *(src++);
							left[i] = (l - r) >> lwbits;
							right[i] = r >> rwbits;
						}
						break;
					}
				case ChannelMode.MidSide:
					{
						int* left = task.frame.subframes[0].samples;
						int* right = task.frame.subframes[1].samples;
						int lwbits = (int)task.frame.subframes[0].wbits;
						int rwbits = (int)task.frame.subframes[1].wbits;
						for (int i = 0; i < count; i++)
						{
							int l = *(src++);
							int r = *(src++);
							left[i] = (l + r) >> (1 + lwbits);
							right[i] = (l - r) >> rwbits;
						}
						break;
					}
			}
		}
예제 #13
0
		unsafe void estimate_residual(FLACCLTask task, int channelsCount)
		{
			if (task.frameSize > 4)
				task.EnqueueKernels();
		}
예제 #14
0
		unsafe void encode_residual(FLACCLTask task, int channelsCount, int iFrame)
		{
			FlacFrame frame = task.frame;

			if (channelsCount == 4 && channels == 2 && frame.blocksize > 4)
			{
				if (task.BestResidualTasks[iFrame * 2].channel == 0 && task.BestResidualTasks[iFrame * 2 + 1].channel == 1)
					frame.ch_mode = ChannelMode.LeftRight;
				else if (task.BestResidualTasks[iFrame * 2].channel == 0 && task.BestResidualTasks[iFrame * 2 + 1].channel == 3)
					frame.ch_mode = ChannelMode.LeftSide;
				else if (task.BestResidualTasks[iFrame * 2].channel == 3 && task.BestResidualTasks[iFrame * 2 + 1].channel == 1)
					frame.ch_mode = ChannelMode.RightSide;
				else if (task.BestResidualTasks[iFrame * 2].channel == 2 && task.BestResidualTasks[iFrame * 2 + 1].channel == 3)
					frame.ch_mode = ChannelMode.MidSide;
				else
					throw new Exception("internal error: invalid stereo mode");
				frame.SwapSubframes(0, task.BestResidualTasks[iFrame * 2].channel);
				frame.SwapSubframes(1, task.BestResidualTasks[iFrame * 2 + 1].channel);
			}
			else
				frame.ch_mode = channels != 2 ? ChannelMode.NotStereo : ChannelMode.LeftRight;

			int toUnpack = Math.Min(task.frameSize, eparams.max_prediction_order);
			// calculate wbits before unpacking samples.
			for (int ch = 0; ch < channels; ch++)
			{
				int index = ch + iFrame * channels;
				frame.subframes[ch].best.residual = ((int*)task.clResidualPtr) + task.BestResidualTasks[index].residualOffs;
				frame.subframes[ch].best.type = SubframeType.Verbatim;
				frame.subframes[ch].best.size = (uint)(frame.subframes[ch].obits * frame.blocksize);
				frame.subframes[ch].wbits = 0;
				if (frame.blocksize > Math.Max(4, eparams.max_prediction_order))
				{
					if (task.BestResidualTasks[index].size < 0)
						throw new Exception("internal error");

					if (frame.subframes[ch].best.size > task.BestResidualTasks[index].size &&
						(SubframeType)task.BestResidualTasks[index].type != SubframeType.Verbatim)
					{
						frame.subframes[ch].best.type = (SubframeType)task.BestResidualTasks[index].type;
						frame.subframes[ch].best.size = (uint)task.BestResidualTasks[index].size;
						frame.subframes[ch].best.order = task.BestResidualTasks[index].residualOrder;
						frame.subframes[ch].best.cbits = task.BestResidualTasks[index].cbits;
						frame.subframes[ch].best.shift = task.BestResidualTasks[index].shift;
						frame.subframes[ch].obits -= task.BestResidualTasks[index].wbits;
						frame.subframes[ch].wbits = task.BestResidualTasks[index].wbits;
						for (int i = 0; i < task.BestResidualTasks[index].residualOrder; i++)
							frame.subframes[ch].best.coefs[i] = task.BestResidualTasks[index].coefs[task.BestResidualTasks[index].residualOrder - 1 - i];
						frame.subframes[ch].best.rc.porder = task.BestResidualTasks[index].porder;
						frame.subframes[ch].best.rc.coding_method = task.BestResidualTasks[index].coding_method;
						if (task.UseGPUOnly && !task.UseGPURice)
						{
							if (frame.subframes[ch].best.type == SubframeType.Fixed || frame.subframes[ch].best.type == SubframeType.LPC)
							{
								int* riceParams = ((int*)task.clBestRiceParamsPtr) + (index << task.max_porder);
								fixed (int* dstParams = frame.subframes[ch].best.rc.rparams)
									AudioSamples.MemCpy(dstParams, riceParams, (1 << frame.subframes[ch].best.rc.porder));
							}
							uint real_size = measure_subframe(frame, frame.subframes[ch]);
							if (real_size != task.frame.subframes[ch].best.size)
								throw new Exception("size reported incorrectly");
						}
					}
					else
					{
						if (task.UseGPURice && frame.subframes[ch].best.size != task.BestResidualTasks[index].size)
							throw new Exception("size reported incorrectly");
					}
				}
				if (task.frame.subframes[ch].best.type == SubframeType.Verbatim)
					toUnpack = task.frameSize;
				if (task.frame.subframes[ch].best.type == SubframeType.LPC && !task.UseGPUOnly)
					toUnpack = task.frameSize;
				if (task.frame.subframes[ch].best.type == SubframeType.Fixed && !task.UseGPUOnly)
					toUnpack = task.frameSize;
			}
			unpack_samples(task, toUnpack);

			for (int ch = 0; ch < channels; ch++)
			{
				int index = ch + iFrame * channels;
				switch (task.frame.subframes[ch].best.type)
				{
					case SubframeType.Constant:
						break;
					case SubframeType.Verbatim:
						break;
					case SubframeType.Fixed:
						if (!task.UseGPUOnly)
						{
							encode_residual_fixed(task.frame.subframes[ch].best.residual, task.frame.subframes[ch].samples,
								task.frame.blocksize, task.frame.subframes[ch].best.order);

							int pmin = get_max_p_order(eparams.min_partition_order, task.frame.blocksize, task.frame.subframes[ch].best.order);
							int pmax = get_max_p_order(eparams.max_partition_order, task.frame.blocksize, task.frame.subframes[ch].best.order);
							calc_rice_params(task.frame.subframes[ch].best.rc, pmin, pmax, task.frame.subframes[ch].best.residual, (uint)task.frame.blocksize, (uint)task.frame.subframes[ch].best.order, PCM.BitsPerSample > 16 ? 1 : 0);
						}
						break;
					case SubframeType.LPC:
						if (!task.UseGPUOnly)
						{
							fixed (int* coefs = task.frame.subframes[ch].best.coefs)
							{
								if (PCM.BitsPerSample > 16)
									lpc.encode_residual_long(task.frame.subframes[ch].best.residual, task.frame.subframes[ch].samples, task.frame.blocksize, task.frame.subframes[ch].best.order, coefs, task.frame.subframes[ch].best.shift);
								else
									lpc.encode_residual(task.frame.subframes[ch].best.residual, task.frame.subframes[ch].samples, task.frame.blocksize, task.frame.subframes[ch].best.order, coefs, task.frame.subframes[ch].best.shift);
							}

							int pmin = get_max_p_order(eparams.min_partition_order, task.frame.blocksize, task.frame.subframes[ch].best.order);
							int pmax = get_max_p_order(eparams.max_partition_order, task.frame.blocksize, task.frame.subframes[ch].best.order);
							calc_rice_params(task.frame.subframes[ch].best.rc, pmin, pmax, task.frame.subframes[ch].best.residual, (uint)task.frame.blocksize, (uint)task.frame.subframes[ch].best.order, PCM.BitsPerSample > 16 ? 1 : 0);
						}
						break;
				}
				if (!task.UseGPUOnly)
				{
					task.frame.subframes[ch].best.size = measure_subframe(task.frame, task.frame.subframes[ch]);
					if (task.frame.subframes[ch].best.size > task.frame.subframes[ch].obits * task.frame.blocksize)
					{
						task.frame.subframes[ch].best.type = SubframeType.Verbatim;
						task.frame.subframes[ch].best.size = (uint)(task.frame.subframes[ch].obits * task.frame.blocksize);
					}
				}
			}
		}
예제 #15
0
		output_subframe_lpc(FLACCLTask task, FlacSubframeInfo sub, int index)
		{
			FlacFrame frame = task.frame;

			// warm-up samples
			for (int i = 0; i < sub.best.order; i++)
				frame.writer.writebits_signed(sub.obits, sub.samples[i]);

			// LPC coefficients
			frame.writer.writebits(4, sub.best.cbits - 1);
			frame.writer.writebits_signed(5, sub.best.shift);
			for (int i = 0; i < sub.best.order; i++)
				frame.writer.writebits_signed(sub.best.cbits, sub.best.coefs[i]);
			
			// residual
			output_residual(task, sub, (sub.obits + sub.best.cbits) * sub.best.order + 9, index);
		}
예제 #16
0
		unsafe void run_GPU_task(FLACCLTask task)
		{
			bool doMidside = channels == 2 && eparams.do_midside;
			int channelsCount = doMidside ? 2 * channels : channels;

			if (task.nResidualTasks == 0)
				initializeSubframeTasks(task.frameSize, channelsCount, framesPerTask, task);

			estimate_residual(task, channelsCount);
		}
예제 #17
0
		unsafe void output_subframes(FLACCLTask task, int iFrame)
		{
			FlacFrame frame = task.frame;
			for (int ch = 0; ch < channels; ch++)
			{
				FlacSubframeInfo sub = frame.subframes[ch];
				// subframe header
				int type_code = (int) sub.best.type;
				if (sub.best.type == SubframeType.Fixed)
					type_code |= sub.best.order;
				if (sub.best.type == SubframeType.LPC)
					type_code |= sub.best.order - 1;
				frame.writer.writebits(1, 0);
				frame.writer.writebits(6, type_code);
				frame.writer.writebits(1, sub.wbits != 0 ? 1 : 0);
				if (sub.wbits > 0)
					frame.writer.writebits((int)sub.wbits, 1);

				//if (frame_writer.Length >= frame_buffer.Length)
				//    throw new Exception("buffer overflow");

				int index = ch + iFrame * channels;

				// subframe
				switch (sub.best.type)
				{
					case SubframeType.Constant:
						output_subframe_constant(frame, sub);
						break;
					case SubframeType.Verbatim:
						output_subframe_verbatim(frame, sub);
						break;
					case SubframeType.Fixed:
						output_subframe_fixed(task, sub, index);
						break;
					case SubframeType.LPC:
						output_subframe_lpc(task, sub, index);
						break;
				}
				//if (frame_writer.Length >= frame_buffer.Length)
				//    throw new Exception("buffer overflow");
			}
		}
예제 #18
0
		unsafe void calculate_window(FLACCLTask task, window_function func, WindowFunction flag)
		{
			if ((eparams.window_function & flag) == 0 || task.nWindowFunctions == lpc.MAX_LPC_WINDOWS)
				return;

			func(((float*)task.clWindowFunctionsPtr) + task.nWindowFunctions * task.frameSize, task.frameSize);
			//int sz = _windowsize;
			//float* pos = window + _windowcount * FLACCLWriter.MAX_BLOCKSIZE * 2;
			//do
			//{
			//    func(pos, sz);
			//    if ((sz & 1) != 0)
			//        break;
			//    pos += sz;
			//    sz >>= 1;
			//} while (sz >= 32);
			task.nWindowFunctions++;
		}