Commit b77182bc authored by Fabio Frazao's avatar Fabio Frazao
Browse files

Merge branch 'new_tut' into 'master'

: New tutorial

See merge request !1
parents 1a8781dc 6432598d
*.ipynb_checkpoints*
tutorials/create_a_narw_detector/narw_tmp_folder
tutorials/create_a_narw_detector/detections.csv
\ No newline at end of file
sound_file;call_time
sample_1.wav;1128.84
sample_1.wav;1153.526
sample_1.wav;1196.778
sample_1.wav;1227.642
sample_1.wav;1358.181
sample_1.wav;1437.482
sample_1.wav;1489.288
sample_1.wav;1511.67
sample_1.wav;1530.595
sample_1.wav;1536.58
sample_1.wav;1714.372
sample_1.wav;1768.251
sample_1.wav;1777.835
Sample_2.wav;68.149
sample_2.wav;688.507
sample_2.wav;755.94
sample_2.wav;770.44
sample_3.wav;68.853
sample_3.wav;105.927
sample_3.wav;1057.015
sample_3.wav;1067.282
sample_3.wav;1290.563
sample_3.wav;1378.955
sample_3.wav;1428.648
sample_3.wav;1663.622
sample_3.wav;1676.682
This source diff could not be displayed because it is too large. You can view the blob instead.
# ================================================================================ #
# Authors: Fabio Frazao and Oliver Kirsebom #
# Contact: fsfrazao@dal.ca, oliver.kirsebom@dal.ca #
# Organization: MERIDIAN (https://meridian.cs.dal.ca/) #
# Team: Data Analytics #
# Project: ketos #
# Project goal: The ketos library provides functionalities for handling #
# and processing acoustic data and applying deep neural networks to sound #
# detection and classification tasks. #
# #
# License: GNU GPLv3 #
# #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <https://www.gnu.org/licenses/>. #
# ================================================================================ #
""" This scripts transforms a Ketos (binary) classifier model into a detector capable of
processing continuous audio data and outputting a list of detections.
The audio data is processed by sliding a window across the audio data,
using a user-specified step size. At each step, the audio segment contained
within the window is passed on to the model for classification.
A number of options are available for modifying the behaviour of the detector,
including post-processing of the classification scores reported by the model.
The script is executed in the terminal with the following command,
python detector.py --model <path_to_saved_model> --audio_folder <path_to_data_folder>
To see the full list of command lines arguments, type
python detector.py --help
"""
import os
import argparse
from tqdm import tqdm
import pandas as pd
import ketos.neural_networks.dev_utils.detection as det
from ketos.audio.audio_loader import AudioFrameLoader
from ketos.neural_networks.resnet import ResNetInterface
from ketos.neural_networks.dev_utils.detection import process, save_detections, merge_overlapping_detections
parser = argparse.ArgumentParser(description="Ketos acoustic signal detection script")
# define command line arguments
parser.add_argument('--model', type=str, default=None,
help='path to the trained ketos classifier model')
parser.add_argument('--audio_folder', type=str, default=None,
help='path to the folder containing the .wav files')
parser.add_argument('--input_list', type=str, default=None,
help='a .txt file listing all the .wav files to be processed. If not specified, all the files in the folder will be processed.')
parser.add_argument('--output', type=str, default='detections.csv',
help='the .csv file where the detections will be saved. An existing file will be overwritten.')
parser.add_argument('--num_segs', type=int, default=128,
help='the number of segment to hold in memory at one time')
parser.add_argument('--step_size', type=float, default=None,
help='step size (in seconds) used for the sliding window')
parser.add_argument('--buffer', type=float, default=0.0,
help='Time (in seconds) to be added on either side of every detected signal')
parser.add_argument('--win_len', type=int, default=1,
help='Length of score averaging window (no. time steps). Must be an odd integer.')
parser.add_argument('--threshold', type=float, default=0.5,
help='minimum score for a detection to be accepted (ranging from 0 to 1)')
show_progress_parser = parser.add_mutually_exclusive_group(required=False)
show_progress_parser.add_argument('--show_progress', dest='progress_bar', action='store_true')
show_progress_parser.add_argument('--hide_progress', dest='progress_bar', action='store_false')
group_parser = parser.add_mutually_exclusive_group(required=False)
group_parser.add_argument('--with_group', dest='group', action='store_true')
group_parser.add_argument('--without_group', dest='group', action='store_false')
group_parser = parser.add_mutually_exclusive_group(required=False)
group_parser.add_argument('--with_merge', dest='merge', action='store_true')
group_parser.add_argument('--without_merge', dest='merge', action='store_false')
parser.set_defaults(progress_bar=True, group=False, merge=False)
# parse command line args
args = parser.parse_args()
assert isinstance(args.win_len, int) and args.win_len%2 == 1, 'win_len must be an odd integer'
# load the classifier and the spectrogram parameters
model, audio_repr = ResNetInterface.load_model_file(args.model, './narw_tmp_folder', load_audio_repr=True)
spec_config = audio_repr[0]['spectrogram']
if args.input_list is not None:
file_list = pd.read_csv(args.input_list, sep='\t')
file_list = list(file_list['filename'])
else:
file_list = None
# initialize the audio loader
audio_loader = AudioFrameLoader(frame=spec_config['duration'], step=args.step_size, path=args.audio_folder, filename=file_list, repres=spec_config)
# process the audio data
detections = process(provider=audio_loader, model=model, batch_size=args.num_segs, buffer=args.buffer, threshold=args.threshold, group=args.group, win_len=args.win_len, progress_bar=args.progress_bar)
if args.merge == True:
detections = merge_overlapping_detections(detections)
# save the detections
if os.path.isfile(args.output): os.remove(args.output) #remove, if already exists
print(f'{len(detections)} detections saved to {args.output}')
save_detections(detections=detections, save_to=args.output)
# Usage:
# python detector_ex1.py --audio_folder=audio --input="sample_1.wav" --output="detections_ex1.csv"
import argparse
from ketos.audio.spectrogram import MagSpectrogram
from ketos.neural_networks.resnet import ResNetInterface
from ketos.audio.audio_loader import AudioFrameLoader
from ketos.neural_networks.dev_utils.detection import process_audio_loader, merge_overlapping_detections, save_detections
parser = argparse.ArgumentParser(description="North Atlantic Right Whale detector - example 1")
parser.add_argument('--audio_folder', type=str,default=None,
help='path to the folder containing the audio file input')
parser.add_argument('--input', type=str,default=None,
help='the .wav file to be processed.')
parser.add_argument('--output', type=str,default=None,
help='the .csv file where the detections will be saved')
parser.add_argument('--n_segs', type=int,default=128,
help='the number of segment to hol in memory at one time')
parser.add_argument('--threshold', type=float,default=0.5,
help='minimum score value for a segment to be considered as a detection (ranging from 0 to 1)')
args = parser.parse_args()
model, audio_repr = ResNetInterface.load_model_file('narw.kt', './narw_tmp_folder', load_audio_repr=True)
spec_config = audio_repr[0]['spectrogram']
audio_loader = AudioFrameLoader(frame=spec_config['duration'], step=spec_config['duration'], path=args.audio_folder, filename=args.input, repres=spec_config)
detections = process_audio_loader(audio_loader, model, batch_size=args.n_segs, step=spec_config['duration'], buffer=0, threshold=args.threshold, average_and_group=False)
save_detections(detections=detections, save_to=args.output)
#Usage:
# python detector_ex2.py --audio_folder=audio --input_list=input_list.txt --output="detections_ex2.csv"
import argparse
from tqdm import tqdm
import pandas as pd
from ketos.audio.spectrogram import MagSpectrogram
from ketos.neural_networks.resnet import ResNetInterface
from ketos.audio.audio_loader import AudioFrameLoader
from ketos.neural_networks.dev_utils.detection import process_audio_loader, merge_overlapping_detections, save_detections
parser = argparse.ArgumentParser(description="North Atlantic Right Whale detector - example 1")
parser.add_argument('--audio_folder', type=str,default=None,
help='path to the folder containing the audio file input')
parser.add_argument('--input_list', type=str,default=None,
help='a .txt file listing all the .wav files to be processed')
parser.add_argument('--output', type=str,default=None,
help='the .csv file where the detections will be saved')
parser.add_argument('--n_segs', type=int,default=128,
help='the number of segment to hol in memory at one time')
parser.add_argument('--step_size', type=float,default=0.5,
help='step size (in seconds) used when ovelapping the spectrograms')
parser.add_argument('--buffer', type=float,default=1.0,
help='Time (in seconds) to be added around the detection')
parser.add_argument('--win_len', type=int,default=5,
help='Length of score averaging window (no. time steps). Must be an odd integer.')
parser.add_argument('--threshold', type=float,default=0.5,
help='minimum score value for a segment to be considered as a detection (ranging from 0 to 1)')
show_progress_parser = parser.add_mutually_exclusive_group(required=False)
show_progress_parser.add_argument('--with_progress', dest='show_progress', action='store_true')
show_progress_parser.add_argument('--without_progress', dest='show_progress', action='store_false')
parser.set_defaults(show_progress=True)
args = parser.parse_args()
assert isinstance(args.win_len, int) and args.win_len%2 == 1, 'win_len must be an odd integer'
model, audio_repr = ResNetInterface.load_model_file('narw.kt', './narw_tmp_folder', load_audio_repr=True)
spec_config = audio_repr[0]['spectrogram']
file_list = pd.read_csv(args.input_list, sep='\t')
file_list = list(file_list['filename'])
for input_file in tqdm(file_list, disable= not args.show_progress):
audio_loader = AudioFrameLoader(frame=spec_config['duration'], step=args.step_size, path=args.audio_folder, filename=input_file, repres=spec_config)
detections = process_audio_loader(audio_loader, model, batch_size=args.n_segs, step=args.step_size, buffer=args.buffer, threshold=args.threshold, win_len=args.win_len)
detections = merge_overlapping_detections(detections)
save_detections(detections=detections, save_to=args.output)
filename
sample_1.wav
sample_2.wav
sample_3.wav
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment