Exemplo n.º 1
0
        //static int fft_len = 128;

        static void Main(string[] args)
        {
            var mp3Dir = args[0];
            var outDir = args[1];
            var tmpDir = outDir + "\\tmp\\";

            Directory.CreateDirectory(tmpDir);

            List <string> fileList    = new List <string>();
            List <string> mp3FileList = new List <string>();
            List <string> outFileList = new List <string>();


            List <double[, ]>  ffts  = new List <double[, ]>();
            List <List <int> > hashs = new List <List <int> >();
            var ranges = new List <List <Range> >();

            int size = 20;

            LSH.init(size, MyFFT.len / 2);
            foreach (var f in new DirectoryInfo(mp3Dir).GetFiles())
            {
                if (f.Extension.ToLower() != ".mp3")
                {
                    continue;
                }

                Console.Write("{0}", f.Name);

                var wavFile = tmpDir + f.Name + ".wav";
                var fftFile = wavFile + ".fft";

                TimeLogger log = new TimeLogger();
                //DateTime last = DateTime.Now;

                ///
                /// 1. 转wav
                ///
                if (!File.Exists(wavFile) && !File.Exists(fftFile)) //wav只是为了生成fft,如果fft已经有了,就不用wav了
                {
                    FFMpeg.Mp3toWav(f.FullName, wavFile);
                }
                fileList.Add(wavFile);
                mp3FileList.Add(f.FullName);
                outFileList.Add(outDir + "\\" + f.Name);

                log.Log(Console.Out, "\t{0:F1}");

                ///
                /// 2. fft
                ///
                if (!File.Exists(fftFile))
                {
                    var a = MyFFT.ProcessWavArr(wavFile);
                    WriteArrayToFile(a, fftFile);
                    ffts.Add(a);
                    File.Delete(wavFile); //生成fft之后就可以删除wav了
                }
                else
                {
                    ffts.Add(ReadArrayFromFile(fftFile));
                }

                log.Log(Console.Out, "\t{0:F1}");


                ///
                /// 3. hash
                ///
                var        hashFile = wavFile + ".hash";
                List <int> hash     = new List <int>();
                if (!File.Exists(hashFile))
                {
                    var a = ffts.Last();
                    using (StreamWriter sw = new StreamWriter(hashFile)) {
                        for (int i = 0; i + size < a.GetLength(0); i++)
                        {
                            int h = LSH.hash(a, i);
                            sw.WriteLine(h);
                            hash.Add(h);
                        }
                    }
                }
                else
                {
                    using (StreamReader sr = new StreamReader(hashFile)) {
                        while (!sr.EndOfStream)
                        {
                            hash.Add(int.Parse(sr.ReadLine()));
                        }
                    }
                }
                hashs.Add(hash);

                log.Log(Console.Out, "\t{0:F1}");

                ranges.Add(new List <Range>());

                for (int i = 0; i < fileList.Count - 1; i++)
                {
                    int j        = fileList.Count - 1;
                    var ranges_i = new List <Range>();
                    var ranges_j = new List <Range>();
                    var lst      = CheckSame(ffts[i], ffts[j], hashs[i], hashs[j], size);
                    FineTune(ffts[i], ffts[j], hashs[i], hashs[j], ranges_i, ranges_j, size, lst);

                    //TODO 加上反向的看看效果会不会有变化

                    ranges_i  = CompresssRange(ranges_i);
                    ranges_j  = CompresssRange(ranges_j);
                    ranges[i] = CombineToRanges(ranges[i], ranges_i);
                    ranges[j] = CombineToRanges(ranges[j], ranges_j);
                }
                //for (int j = 0; j < fileList.Count; j++) {
                //    ranges[j] = CompresssRange(ranges[j]);
                //}

                //存储广告位置
                for (int i = 0; i < fileList.Count; i++)
                {
                    using (StreamWriter sw = new StreamWriter(fileList[i] + ".range")) {
                        //sw.WriteLine(ranges[i].Count);
                        foreach (var r in ranges[i])
                        {
                            sw.WriteLine("{0} {1} {2}", r.begin, r.end, r.count);
                        }
                    }
                }

                log.Log(Console.Out, "\t{0:F1}\n");
            }

            //读取广告位置,并且报告出现次数
            for (int i = 0; i < fileList.Count; i++)
            {
                var range_file = fileList[i] + ".range";
                using (StreamReader sr = new StreamReader(range_file)) {
                    var range = new List <Range>();
                    while (!sr.EndOfStream)
                    {
                        var x = sr.ReadLine().Split(' ');
                        if (x.Length != 3)
                        {
                            continue;
                        }
                        range.Add(new Range(int.Parse(x[0]), int.Parse(x[1]), int.Parse(x[2])));
                    }
                    ranges[i] = ReverseRange(range, ffts[i].GetLength(0));
                }
            }

            //5. 提取、拼接 正文剩下部分
            for (int i = 0; i < fileList.Count; i++)
            {
                CutAndCombine(mp3FileList[i], outFileList[i], fileList[i], ranges[i]);
            }


            //0.1s的精度。22.05khz
        }
Exemplo n.º 2
0
        const bool saveTmpFile  = false; //会不会输出临时文件。输出的话,第二次计算会快;不输出当然就省空间了

        /// <summary>
        /// 检测一系列MP3文件包含的广告(重复出现的部分)
        /// </summary>
        /// <param name="mp3Files"></param>
        /// <param name="notify">在需要的时候调用notify()更新进度,参数是[0, 1]的浮点数</param>
        /// <returns>对应各个MP3的广告区间</returns>
        public static List <KeyValuePair <List <Range>, int> > DetectAD(string[] mp3Files, Action <int, int> notify)
        {
            var tmpDir = Path.Combine(Path.GetTempPath(), tmpdirName);

            Directory.CreateDirectory(tmpDir);

            List <string> fileList = new List <string>();

            List <double[, ]>  ffts  = new List <double[, ]>();
            List <List <int> > hashs = new List <List <int> >();
            var ranges = new List <List <Range> >();

            int size = 20;

            LSH.init(size, MyFFT.len / 2);

            List <Link> links = new List <Link>();

            TimeEstimater log = new TimeEstimater();

            log.InitType("wav", mp3Files.Length);
            log.InitType("fft", mp3Files.Length);
            log.InitType("hash", mp3Files.Length);
            log.InitType("compare", mp3Files.Length * (mp3Files.Length - 1) / 2);

            foreach (var fn in mp3Files)
            {
                var f = new FileInfo(fn);
                if (f.Extension.ToLower() != ".mp3")
                {
                    continue;
                }

                Console.Write("{0}", f.Name);

                var wavFile = Path.Combine(tmpDir, f.Name + ".wav");
                var fftFile = wavFile + ".fft";

                log.StartTimer("wav");

                ///
                /// 1. 转wav
                ///
                if (!File.Exists(wavFile) && !File.Exists(fftFile)) //wav只是为了生成fft,如果fft已经有了,就不用wav了
                {
                    FFMpeg.Mp3toWav(f.FullName, wavFile);
                }
                fileList.Add(wavFile);

                log.EndTimer("wav");
                notify((int)log.GetTimeUsed().TotalSeconds, (int)log.EstimateTime().TotalSeconds);
                log.StartTimer("fft");

                ///
                /// 2. fft
                ///
                if (!File.Exists(fftFile))
                {
                    var a = MyFFT.ProcessWavArr(wavFile);
                    if (saveTmpFile)
                    {
                        WriteArrayToFile(a, fftFile);
                    }
                    ffts.Add(a);
                    File.Delete(wavFile); //生成fft之后就可以删除wav了
                }
                else
                {
                    ffts.Add(ReadArrayFromFile(fftFile));
                }

                log.EndTimer("fft");
                notify((int)log.GetTimeUsed().TotalSeconds, (int)log.EstimateTime().TotalSeconds);
                log.StartTimer("hash");

                ///
                /// 3. hash
                ///
                var        hashFile = wavFile + ".hash";
                List <int> hash     = new List <int>();
                if (!File.Exists(hashFile))
                {
                    var a = ffts.Last();

                    for (int i = 0; i + size < a.GetLength(0); i++)
                    {
                        int h = LSH.hash(a, i);
                        hash.Add(h);
                    }

                    if (saveTmpFile)
                    {
                        using (StreamWriter sw = new StreamWriter(hashFile)) {
                            foreach (var h in hash)
                            {
                                sw.WriteLine(h);
                            }
                        }
                    }
                }
                else
                {
                    using (StreamReader sr = new StreamReader(hashFile)) {
                        while (!sr.EndOfStream)
                        {
                            hash.Add(int.Parse(sr.ReadLine()));
                        }
                    }
                }
                hashs.Add(hash);

                log.EndTimer("hash");
                notify((int)log.GetTimeUsed().TotalSeconds, (int)log.EstimateTime().TotalSeconds);

                //log.Log(Console.Out, "\t{0:F1}");

                ranges.Add(new List <Range>());

                for (int i = 0; i < fileList.Count - 1; i++)
                {
                    int j = fileList.Count - 1;

                    log.StartTimer("compare");

                    var ranges_i = new List <Range>();
                    var ranges_j = new List <Range>();
                    var lst      = CheckSame(ffts[i], ffts[j], hashs[i], hashs[j], size);
                    var tlink    = FineTune(ffts[i], ffts[j], hashs[i], hashs[j], ranges_i, ranges_j, size, lst);

                    //TODO 加上反向的看看效果会不会有变化

                    //添加等价关系
                    foreach (var link in tlink)
                    {
                        links.Add(new Link(i, j, link.Key, link.Value));
                    }

                    ranges_i  = CompresssRange(ranges_i);
                    ranges_j  = CompresssRange(ranges_j);
                    ranges[i] = CombineToRanges(ranges[i], ranges_i);
                    ranges[j] = CombineToRanges(ranges[j], ranges_j);

                    log.EndTimer("compare");
                    notify((int)log.GetTimeUsed().TotalSeconds, (int)log.EstimateTime().TotalSeconds);
                }



                //存储广告位置
                //for (int i = 0; i < fileList.Count; i++) {
                //    using (StreamWriter sw = new StreamWriter(fileList[i] + ".range")) {
                //        //sw.WriteLine(ranges[i].Count);
                //        foreach (var r in ranges[i]) {
                //            sw.WriteLine("{0} {1} {2}", r.begin, r.end, r.count);
                //        }
                //    }
                //}

                //log.Log(Console.Out, "\t{0:F1}\n");
            }
            for (int j = 0; j < ranges.Count; j++)
            {
                ranges[j] = CompresssRange(ranges[j]);
            }
            CalcRangeTypes(ranges, links);
            //读取广告位置,并且报告出现次数
            //for (int i = 0; i < fileList.Count; i++) {
            //    var range_file = fileList[i] + ".range";
            //    using (StreamReader sr = new StreamReader(range_file)) {
            //        var range = new List<Range>();
            //        while (!sr.EndOfStream) {
            //            var x = sr.ReadLine().Split(' ');
            //            if (x.Length != 3) continue;
            //            range.Add(new Range(int.Parse(x[0]), int.Parse(x[1]), int.Parse(x[2])));
            //        }
            //        ranges[i] = ReverseRange(range, ffts[i].GetLength(0));
            //    }
            //}

            var ret = new List <KeyValuePair <List <Range>, int> >();

            for (int i = 0; i < ranges.Count; i++)
            {
                ret.Add(new KeyValuePair <List <Range>, int>(ranges[i], ffts[i].GetLength(0)));
            }
            return(ret);
        }