【原創(chuàng)】Swift分析音頻文件

過去半年參與的一個項(xiàng)目中有遇到并解決了一系列關(guān)于音頻文件的操作方式。相關(guān)資料在網(wǎng)上很難找到,所以先整理下以方便今后查漏補(bǔ)缺。

1.如果你需要分析的音頻文件是在服務(wù)端,那需要先將文件下載在本地,所以我們先實(shí)現(xiàn)根據(jù)URL地址下載音頻文件的功能。

import AVKit

typealias AudioDownloadedCallback = (URL?) -> ()

extension AVAsset {
    static func downloadAudioToLocal(audioURL:URL,downloadCallback:@escaping AudioDownloadedCallback) {
        
        // create your destination file url
        let destinationUrl = FileManager.getDocumentPathWithFileName(sFileName: audioURL.lastPathComponent)
        
        // to check if it exists before downloading it
        if FileManager.default.fileExists(atPath: destinationUrl.path) {
            //The file already exists at path
            downloadCallback(destinationUrl)
        } else {
            //download audio file.
            URLSession.shared.downloadTask(with: audioURL, completionHandler: { (location, response, error) -> Void in
                guard let location = location, error == nil else { return }
                do {
                    //File moved to documents folder
                    try FileManager.default.moveItem(at: location, to: destinationUrl)
                    downloadCallback(destinationUrl)
                } catch let error as NSError {
                    print(error.localizedDescription)
                    
                    downloadCallback(nil)
                }
            }).resume()
        }
    }
}

extension FileManager {
    static func getDocumentPathWithFileName(sFileName:String) -> URL {
        let fileMgr = FileManager.default
        
        let dirPaths = fileMgr.urls(for: .documentDirectory,
                                    in: .userDomainMask).first!
        
        let filePath = dirPaths.appendingPathComponent(sFileName)
        return filePath
    }
}

文件有了后可以開始讀取文件。這里有兩種方式:
1.通過AVAssetReader進(jìn)行讀取并用NSMutableData記錄下來。
2.生成AVAudioFile音頻實(shí)例,配合AVAudioPCMBuffer進(jìn)行分段讀取。
兩者的區(qū)別在于:第一種方式是整體讀取,當(dāng)AVAssetReader開始執(zhí)行讀取后,讀取的內(nèi)容段并不受控制,每段讀取的長度由CMBlockBufferGetDataLength()計算好了返回。而第二種方式可以自己定義讀取的起始點(diǎn)和步長,你甚至可以選擇性的重復(fù)讀取。具體使用哪個看自己的業(yè)務(wù)場景。

整體讀取AVAssetReader + NSMutableData

import UIKit
import AVFoundation
import AVKit

extension AVURLAsset {
    func readAudioBuffer() -> NSMutableData {
        var reader:AVAssetReader?
        do {
            reader = try AVAssetReader(asset: self)
        } catch let error{
            print("Create asset reader failed.\(error.localizedDescription)")
        }
        let sampleData = NSMutableData()
        
        if reader != nil {
            //we only read one tracks from the audio file. If your file is not mone and you need to read them all. just duplicate operation once again.
            let songTrack:AVAssetTrack = self.tracks[0]
            let readerOutputSettings: [String: Int] = [ AVFormatIDKey: Int(kAudioFormatLinearPCM),
                                                        AVLinearPCMBitDepthKey: 16,
                                                        AVLinearPCMIsBigEndianKey: 0,
                                                        AVLinearPCMIsFloatKey: 0,
                                                        AVLinearPCMIsNonInterleaved: 0]
            let output = AVAssetReaderTrackOutput(track: songTrack, outputSettings: readerOutputSettings)
            
            reader?.add(output)
            
            reader?.startReading()
            
            while reader?.status == AVAssetReader.Status.reading {
                if let sampleBufferRef = output.copyNextSampleBuffer() {
                    
                    if let blockBufferRef = CMSampleBufferGetDataBuffer(sampleBufferRef) {
                        
                        let bufferLength = CMBlockBufferGetDataLength(blockBufferRef)
                        
                        let data = NSMutableData(length: bufferLength)
                        
                        CMBlockBufferCopyDataBytes(blockBufferRef, atOffset: 0, dataLength: bufferLength, destination: (data?.mutableBytes)!)
                        
                        let samples = UnsafeMutablePointer<Int16>(OpaquePointer(UnsafeMutableRawPointer(data!.mutableBytes)))
                        sampleData.append(samples, length: bufferLength)
                        CMSampleBufferInvalidate(sampleBufferRef)
                    }
                }
            }
        }
        return sampleData
    }
}

PS:代碼中注釋處標(biāo)明這段代碼只是讀第0條音軌。如果你需要分析的文件是立體聲的(也就是存在兩條或以上的音軌),需要針對不同的音軌進(jìn)行調(diào)整。
效果:


3211552369372_.pic_hd.jpg

拿到結(jié)果后可以根據(jù)自己需求進(jìn)行再加工,比如我自己實(shí)現(xiàn)了一個根據(jù)UI要求的高寬來對音頻數(shù)據(jù)進(jìn)行偏移處理。

extension NSMutableData {
    func toInt16Sequence(size:CGSize) -> [Float] {
        var filteredSamplesMA:[Float] = [Float]()
        let sampleCount = self.length / MemoryLayout<Int16>.size
        let binSize = Float(sampleCount) / (Float(size.width) * 0.5)
        var i = 0
        while i < sampleCount {
            let rangeData = self.subdata(with: NSRange(location: i, length: 1))
            let item = rangeData.withUnsafeBytes({ (ptr: UnsafePointer<Int>) -> Int in
                return ptr.pointee
            })
            filteredSamplesMA.append(Float(item))
            i += Int(binSize)
        }
        let result = NSMutableData.trackScale(size: size, source: filteredSamplesMA)
        return result
    }
    
    private static func trackScale(size: CGSize, source: [Float]) -> [Float] {
        if let max = source.max() {
            let k = Float(size.height) / max
            return source.map{ $0 * k }
        }
        return source
    }
}

最終效果:


3221552370149_.pic_hd.jpg

有了這些數(shù)據(jù)后可以去UI層進(jìn)行相應(yīng)的繪制,或者保存下來進(jìn)行其他方式的分析。

接下來介紹第二種方式 AVAudioFile + AVAudioPCMBuffer
這種方式不同于第一種整體讀取,而是可以自己指定每次讀取的起始位置和長度,相比較而言更加靈活一些。

import AVFoundation

extension AVAudioFile {
    static func readAmplitude(audioURL:URL) -> NSMutableData {
        let asset = AVURLAsset(url: audioURL)
        var file:AVAudioFile!
        do{
            file = try AVAudioFile(forReading: audioURL)
        } catch let error{
            print("AVAudioFile create failed \(error.localizedDescription)")
        }
        let amplitudes:NSMutableData = NSMutableData()
        //FPS
        let frameCountPeiSecond = 30.0
        
        //Calculate totoal frame of the audio file.
        let frameCountTotal = asset.duration.seconds * frameCountPeiSecond
        
        //merge samples of each frame
        let sampleForEachFrame = Double(file.length.magnitude) / frameCountTotal
        
        var sampleReadIndex:Double = 0.0
        
        while sampleReadIndex < Double(file.length) {
            
            let audioBuffer:AVAudioPCMBuffer = AVAudioPCMBuffer(pcmFormat: file.processingFormat, frameCapacity: AVAudioFrameCount(sampleForEachFrame))!
            audioBuffer.frameLength = UInt32(sampleForEachFrame)
            
            //adjust frame position each time
            file.framePosition = AVAudioFramePosition(sampleReadIndex)
            
            do{
                //read same size buffer of each time
                try file.read(into: audioBuffer, frameCount: UInt32(sampleForEachFrame))
                amplitudes.append((audioBuffer.floatChannelData?.pointee)!, length: Int(sampleForEachFrame))
            } catch let error{
                print("read to buffer failed \(error.localizedDescription)")
            }
            //adding up
            sampleReadIndex = Double(sampleReadIndex + sampleForEachFrame)
        }
        return amplitudes
    }
}

結(jié)果打印


image.png

到此數(shù)據(jù)就都分析出來了,接下來可以隨意選用適合自己場景的第三方框架顯示出對應(yīng)的分析圖出來。

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

推薦閱讀更多精彩內(nèi)容