private static void CutAndCombine(string mp3, string output, string wav, List <Range> range) { string listFile = wav + ".list"; using (StreamWriter sw = new StreamWriter(listFile)) { for (int i = 0; i < range.Count; i++) { var r = range[i]; var file = wav + i + ".mp3"; FFMpeg.Split(mp3, file, r.begin / 10.0, r.end / 10.0); sw.WriteLine("file '{0}'", file); } } FFMpeg.Concat(listFile, output); }
//static int fft_len = 128; static void Main(string[] args) { var mp3Dir = args[0]; var outDir = args[1]; var tmpDir = outDir + "\\tmp\\"; Directory.CreateDirectory(tmpDir); List <string> fileList = new List <string>(); List <string> mp3FileList = new List <string>(); List <string> outFileList = new List <string>(); List <double[, ]> ffts = new List <double[, ]>(); List <List <int> > hashs = new List <List <int> >(); var ranges = new List <List <Range> >(); int size = 20; LSH.init(size, MyFFT.len / 2); foreach (var f in new DirectoryInfo(mp3Dir).GetFiles()) { if (f.Extension.ToLower() != ".mp3") { continue; } Console.Write("{0}", f.Name); var wavFile = tmpDir + f.Name + ".wav"; var fftFile = wavFile + ".fft"; TimeLogger log = new TimeLogger(); //DateTime last = DateTime.Now; /// /// 1. 转wav /// if (!File.Exists(wavFile) && !File.Exists(fftFile)) //wav只是为了生成fft,如果fft已经有了,就不用wav了 { FFMpeg.Mp3toWav(f.FullName, wavFile); } fileList.Add(wavFile); mp3FileList.Add(f.FullName); outFileList.Add(outDir + "\\" + f.Name); log.Log(Console.Out, "\t{0:F1}"); /// /// 2. fft /// if (!File.Exists(fftFile)) { var a = MyFFT.ProcessWavArr(wavFile); WriteArrayToFile(a, fftFile); ffts.Add(a); File.Delete(wavFile); //生成fft之后就可以删除wav了 } else { ffts.Add(ReadArrayFromFile(fftFile)); } log.Log(Console.Out, "\t{0:F1}"); /// /// 3. hash /// var hashFile = wavFile + ".hash"; List <int> hash = new List <int>(); if (!File.Exists(hashFile)) { var a = ffts.Last(); using (StreamWriter sw = new StreamWriter(hashFile)) { for (int i = 0; i + size < a.GetLength(0); i++) { int h = LSH.hash(a, i); sw.WriteLine(h); hash.Add(h); } } } else { using (StreamReader sr = new StreamReader(hashFile)) { while (!sr.EndOfStream) { hash.Add(int.Parse(sr.ReadLine())); } } } hashs.Add(hash); log.Log(Console.Out, "\t{0:F1}"); ranges.Add(new List <Range>()); for (int i = 0; i < fileList.Count - 1; i++) { int j = fileList.Count - 1; var ranges_i = new List <Range>(); var ranges_j = new List <Range>(); var lst = CheckSame(ffts[i], ffts[j], hashs[i], hashs[j], size); FineTune(ffts[i], ffts[j], hashs[i], hashs[j], ranges_i, ranges_j, size, lst); //TODO 加上反向的看看效果会不会有变化 ranges_i = CompresssRange(ranges_i); ranges_j = CompresssRange(ranges_j); ranges[i] = CombineToRanges(ranges[i], ranges_i); ranges[j] = CombineToRanges(ranges[j], ranges_j); } //for (int j = 0; j < fileList.Count; j++) { // ranges[j] = CompresssRange(ranges[j]); //} //存储广告位置 for (int i = 0; i < fileList.Count; i++) { using (StreamWriter sw = new StreamWriter(fileList[i] + ".range")) { //sw.WriteLine(ranges[i].Count); foreach (var r in ranges[i]) { sw.WriteLine("{0} {1} {2}", r.begin, r.end, r.count); } } } log.Log(Console.Out, "\t{0:F1}\n"); } //读取广告位置,并且报告出现次数 for (int i = 0; i < fileList.Count; i++) { var range_file = fileList[i] + ".range"; using (StreamReader sr = new StreamReader(range_file)) { var range = new List <Range>(); while (!sr.EndOfStream) { var x = sr.ReadLine().Split(' '); if (x.Length != 3) { continue; } range.Add(new Range(int.Parse(x[0]), int.Parse(x[1]), int.Parse(x[2]))); } ranges[i] = ReverseRange(range, ffts[i].GetLength(0)); } } //5. 提取、拼接 正文剩下部分 for (int i = 0; i < fileList.Count; i++) { CutAndCombine(mp3FileList[i], outFileList[i], fileList[i], ranges[i]); } //0.1s的精度。22.05khz }
const bool saveTmpFile = false; //会不会输出临时文件。输出的话,第二次计算会快;不输出当然就省空间了 /// <summary> /// 检测一系列MP3文件包含的广告(重复出现的部分) /// </summary> /// <param name="mp3Files"></param> /// <param name="notify">在需要的时候调用notify()更新进度,参数是[0, 1]的浮点数</param> /// <returns>对应各个MP3的广告区间</returns> public static List <KeyValuePair <List <Range>, int> > DetectAD(string[] mp3Files, Action <int, int> notify) { var tmpDir = Path.Combine(Path.GetTempPath(), tmpdirName); Directory.CreateDirectory(tmpDir); List <string> fileList = new List <string>(); List <double[, ]> ffts = new List <double[, ]>(); List <List <int> > hashs = new List <List <int> >(); var ranges = new List <List <Range> >(); int size = 20; LSH.init(size, MyFFT.len / 2); List <Link> links = new List <Link>(); TimeEstimater log = new TimeEstimater(); log.InitType("wav", mp3Files.Length); log.InitType("fft", mp3Files.Length); log.InitType("hash", mp3Files.Length); log.InitType("compare", mp3Files.Length * (mp3Files.Length - 1) / 2); foreach (var fn in mp3Files) { var f = new FileInfo(fn); if (f.Extension.ToLower() != ".mp3") { continue; } Console.Write("{0}", f.Name); var wavFile = Path.Combine(tmpDir, f.Name + ".wav"); var fftFile = wavFile + ".fft"; log.StartTimer("wav"); /// /// 1. 转wav /// if (!File.Exists(wavFile) && !File.Exists(fftFile)) //wav只是为了生成fft,如果fft已经有了,就不用wav了 { FFMpeg.Mp3toWav(f.FullName, wavFile); } fileList.Add(wavFile); log.EndTimer("wav"); notify((int)log.GetTimeUsed().TotalSeconds, (int)log.EstimateTime().TotalSeconds); log.StartTimer("fft"); /// /// 2. fft /// if (!File.Exists(fftFile)) { var a = MyFFT.ProcessWavArr(wavFile); if (saveTmpFile) { WriteArrayToFile(a, fftFile); } ffts.Add(a); File.Delete(wavFile); //生成fft之后就可以删除wav了 } else { ffts.Add(ReadArrayFromFile(fftFile)); } log.EndTimer("fft"); notify((int)log.GetTimeUsed().TotalSeconds, (int)log.EstimateTime().TotalSeconds); log.StartTimer("hash"); /// /// 3. hash /// var hashFile = wavFile + ".hash"; List <int> hash = new List <int>(); if (!File.Exists(hashFile)) { var a = ffts.Last(); for (int i = 0; i + size < a.GetLength(0); i++) { int h = LSH.hash(a, i); hash.Add(h); } if (saveTmpFile) { using (StreamWriter sw = new StreamWriter(hashFile)) { foreach (var h in hash) { sw.WriteLine(h); } } } } else { using (StreamReader sr = new StreamReader(hashFile)) { while (!sr.EndOfStream) { hash.Add(int.Parse(sr.ReadLine())); } } } hashs.Add(hash); log.EndTimer("hash"); notify((int)log.GetTimeUsed().TotalSeconds, (int)log.EstimateTime().TotalSeconds); //log.Log(Console.Out, "\t{0:F1}"); ranges.Add(new List <Range>()); for (int i = 0; i < fileList.Count - 1; i++) { int j = fileList.Count - 1; log.StartTimer("compare"); var ranges_i = new List <Range>(); var ranges_j = new List <Range>(); var lst = CheckSame(ffts[i], ffts[j], hashs[i], hashs[j], size); var tlink = FineTune(ffts[i], ffts[j], hashs[i], hashs[j], ranges_i, ranges_j, size, lst); //TODO 加上反向的看看效果会不会有变化 //添加等价关系 foreach (var link in tlink) { links.Add(new Link(i, j, link.Key, link.Value)); } ranges_i = CompresssRange(ranges_i); ranges_j = CompresssRange(ranges_j); ranges[i] = CombineToRanges(ranges[i], ranges_i); ranges[j] = CombineToRanges(ranges[j], ranges_j); log.EndTimer("compare"); notify((int)log.GetTimeUsed().TotalSeconds, (int)log.EstimateTime().TotalSeconds); } //存储广告位置 //for (int i = 0; i < fileList.Count; i++) { // using (StreamWriter sw = new StreamWriter(fileList[i] + ".range")) { // //sw.WriteLine(ranges[i].Count); // foreach (var r in ranges[i]) { // sw.WriteLine("{0} {1} {2}", r.begin, r.end, r.count); // } // } //} //log.Log(Console.Out, "\t{0:F1}\n"); } for (int j = 0; j < ranges.Count; j++) { ranges[j] = CompresssRange(ranges[j]); } CalcRangeTypes(ranges, links); //读取广告位置,并且报告出现次数 //for (int i = 0; i < fileList.Count; i++) { // var range_file = fileList[i] + ".range"; // using (StreamReader sr = new StreamReader(range_file)) { // var range = new List<Range>(); // while (!sr.EndOfStream) { // var x = sr.ReadLine().Split(' '); // if (x.Length != 3) continue; // range.Add(new Range(int.Parse(x[0]), int.Parse(x[1]), int.Parse(x[2]))); // } // ranges[i] = ReverseRange(range, ffts[i].GetLength(0)); // } //} var ret = new List <KeyValuePair <List <Range>, int> >(); for (int i = 0; i < ranges.Count; i++) { ret.Add(new KeyValuePair <List <Range>, int>(ranges[i], ffts[i].GetLength(0))); } return(ret); }