blob: ded25449ddfe76b7ead36dc7d300b40887367d81
1 | /* |
2 | * This file is part of FFmpeg. |
3 | * |
4 | * FFmpeg is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU Lesser General Public |
6 | * License as published by the Free Software Foundation; either |
7 | * version 2.1 of the License, or (at your option) any later version. |
8 | * |
9 | * FFmpeg is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with FFmpeg; if not, write to the Free Software |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #include <rubberband/rubberband-c.h> |
20 | |
21 | #include "libavutil/channel_layout.h" |
22 | #include "libavutil/common.h" |
23 | #include "libavutil/opt.h" |
24 | |
25 | #include "audio.h" |
26 | #include "avfilter.h" |
27 | #include "formats.h" |
28 | #include "internal.h" |
29 | |
30 | typedef struct RubberBandContext { |
31 | const AVClass *class; |
32 | RubberBandState rbs; |
33 | |
34 | double tempo, pitch; |
35 | int transients, detector, phase, window, |
36 | smoothing, formant, opitch, channels; |
37 | int64_t nb_samples_out; |
38 | int64_t nb_samples_in; |
39 | int flushed; |
40 | } RubberBandContext; |
41 | |
42 | #define OFFSET(x) offsetof(RubberBandContext, x) |
43 | #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM |
44 | |
45 | static const AVOption rubberband_options[] = { |
46 | { "tempo", "set tempo scale factor", OFFSET(tempo), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.01, 100, A }, |
47 | { "pitch", "set pitch scale factor", OFFSET(pitch), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.01, 100, A }, |
48 | { "transients", "set transients", OFFSET(transients), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "transients" }, |
49 | { "crisp", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsCrisp}, 0, 0, A, "transients" }, |
50 | { "mixed", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsMixed}, 0, 0, A, "transients" }, |
51 | { "smooth", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsSmooth}, 0, 0, A, "transients" }, |
52 | { "detector", "set detector", OFFSET(detector), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "detector" }, |
53 | { "compound", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorCompound}, 0, 0, A, "detector" }, |
54 | { "percussive", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorPercussive}, 0, 0, A, "detector" }, |
55 | { "soft", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorSoft}, 0, 0, A, "detector" }, |
56 | { "phase", "set phase", OFFSET(phase), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "phase" }, |
57 | { "laminar", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPhaseLaminar}, 0, 0, A, "phase" }, |
58 | { "independent", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPhaseIndependent}, 0, 0, A, "phase" }, |
59 | { "window", "set window", OFFSET(window), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "window" }, |
60 | { "standard", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowStandard}, 0, 0, A, "window" }, |
61 | { "short", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowShort}, 0, 0, A, "window" }, |
62 | { "long", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowLong}, 0, 0, A, "window" }, |
63 | { "smoothing", "set smoothing", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "smoothing" }, |
64 | { "off", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionSmoothingOff}, 0, 0, A, "smoothing" }, |
65 | { "on", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionSmoothingOn}, 0, 0, A, "smoothing" }, |
66 | { "formant", "set formant", OFFSET(formant), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "formant" }, |
67 | { "shifted", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionFormantShifted}, 0, 0, A, "formant" }, |
68 | { "preserved", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionFormantPreserved}, 0, 0, A, "formant" }, |
69 | { "pitchq", "set pitch quality", OFFSET(opitch), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "pitch" }, |
70 | { "quality", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighQuality}, 0, 0, A, "pitch" }, |
71 | { "speed", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighSpeed}, 0, 0, A, "pitch" }, |
72 | { "consistency", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighConsistency}, 0, 0, A, "pitch" }, |
73 | { "channels", "set channels", OFFSET(channels), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "channels" }, |
74 | { "apart", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionChannelsApart}, 0, 0, A, "channels" }, |
75 | { "together", 0, 0, AV_OPT_TYPE_CONST, {.i64=RubberBandOptionChannelsTogether}, 0, 0, A, "channels" }, |
76 | { NULL }, |
77 | }; |
78 | |
79 | AVFILTER_DEFINE_CLASS(rubberband); |
80 | |
81 | static av_cold void uninit(AVFilterContext *ctx) |
82 | { |
83 | RubberBandContext *s = ctx->priv; |
84 | |
85 | if (s->rbs) |
86 | rubberband_delete(s->rbs); |
87 | } |
88 | |
89 | static int query_formats(AVFilterContext *ctx) |
90 | { |
91 | AVFilterFormats *formats = NULL; |
92 | AVFilterChannelLayouts *layouts = NULL; |
93 | static const enum AVSampleFormat sample_fmts[] = { |
94 | AV_SAMPLE_FMT_FLTP, |
95 | AV_SAMPLE_FMT_NONE, |
96 | }; |
97 | int ret; |
98 | |
99 | layouts = ff_all_channel_counts(); |
100 | if (!layouts) |
101 | return AVERROR(ENOMEM); |
102 | ret = ff_set_common_channel_layouts(ctx, layouts); |
103 | if (ret < 0) |
104 | return ret; |
105 | |
106 | formats = ff_make_format_list(sample_fmts); |
107 | if (!formats) |
108 | return AVERROR(ENOMEM); |
109 | ret = ff_set_common_formats(ctx, formats); |
110 | if (ret < 0) |
111 | return ret; |
112 | |
113 | formats = ff_all_samplerates(); |
114 | if (!formats) |
115 | return AVERROR(ENOMEM); |
116 | return ff_set_common_samplerates(ctx, formats); |
117 | } |
118 | |
119 | static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
120 | { |
121 | RubberBandContext *s = inlink->dst->priv; |
122 | AVFilterLink *outlink = inlink->dst->outputs[0]; |
123 | AVFrame *out; |
124 | int ret = 0, nb_samples; |
125 | |
126 | rubberband_process(s->rbs, (const float *const *)in->data, in->nb_samples, 0); |
127 | s->nb_samples_in += in->nb_samples; |
128 | |
129 | nb_samples = rubberband_available(s->rbs); |
130 | if (nb_samples > 0) { |
131 | out = ff_get_audio_buffer(inlink, nb_samples); |
132 | if (!out) { |
133 | av_frame_free(&in); |
134 | return AVERROR(ENOMEM); |
135 | } |
136 | out->pts = av_rescale_q(s->nb_samples_out, |
137 | (AVRational){ 1, outlink->sample_rate }, |
138 | outlink->time_base); |
139 | nb_samples = rubberband_retrieve(s->rbs, (float *const *)out->data, nb_samples); |
140 | out->nb_samples = nb_samples; |
141 | ret = ff_filter_frame(outlink, out); |
142 | s->nb_samples_out += nb_samples; |
143 | } |
144 | |
145 | av_frame_free(&in); |
146 | return ret; |
147 | } |
148 | |
149 | static int config_input(AVFilterLink *inlink) |
150 | { |
151 | AVFilterContext *ctx = inlink->dst; |
152 | RubberBandContext *s = ctx->priv; |
153 | int opts = s->transients|s->detector|s->phase|s->window| |
154 | s->smoothing|s->formant|s->opitch|s->channels| |
155 | RubberBandOptionProcessRealTime; |
156 | |
157 | if (s->rbs) |
158 | rubberband_delete(s->rbs); |
159 | s->rbs = rubberband_new(inlink->sample_rate, inlink->channels, opts, 1. / s->tempo, s->pitch); |
160 | |
161 | inlink->partial_buf_size = |
162 | inlink->min_samples = |
163 | inlink->max_samples = rubberband_get_samples_required(s->rbs); |
164 | |
165 | return 0; |
166 | } |
167 | |
168 | static int request_frame(AVFilterLink *outlink) |
169 | { |
170 | AVFilterContext *ctx = outlink->src; |
171 | RubberBandContext *s = ctx->priv; |
172 | AVFilterLink *inlink = ctx->inputs[0]; |
173 | int ret = 0; |
174 | |
175 | ret = ff_request_frame(ctx->inputs[0]); |
176 | |
177 | if (ret == AVERROR_EOF && !s->flushed) { |
178 | if (rubberband_available(s->rbs) > 0) { |
179 | AVFrame *out = ff_get_audio_buffer(inlink, 1); |
180 | int nb_samples; |
181 | |
182 | if (!out) |
183 | return AVERROR(ENOMEM); |
184 | |
185 | rubberband_process(s->rbs, (const float *const *)out->data, 1, 1); |
186 | av_frame_free(&out); |
187 | nb_samples = rubberband_available(s->rbs); |
188 | |
189 | if (nb_samples > 0) { |
190 | out = ff_get_audio_buffer(inlink, nb_samples); |
191 | if (!out) |
192 | return AVERROR(ENOMEM); |
193 | out->pts = av_rescale_q(s->nb_samples_out, |
194 | (AVRational){ 1, outlink->sample_rate }, |
195 | outlink->time_base); |
196 | nb_samples = rubberband_retrieve(s->rbs, (float *const *)out->data, nb_samples); |
197 | out->nb_samples = nb_samples; |
198 | ret = ff_filter_frame(outlink, out); |
199 | s->nb_samples_out += nb_samples; |
200 | } |
201 | } |
202 | s->flushed = 1; |
203 | av_log(ctx, AV_LOG_DEBUG, "nb_samples_in %"PRId64" nb_samples_out %"PRId64"\n", |
204 | s->nb_samples_in, s->nb_samples_out); |
205 | } |
206 | |
207 | return ret; |
208 | } |
209 | |
210 | static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, |
211 | char *res, int res_len, int flags) |
212 | { |
213 | RubberBandContext *s = ctx->priv; |
214 | |
215 | if (!strcmp(cmd, "tempo")) { |
216 | double arg; |
217 | |
218 | sscanf(args, "%lf", &arg); |
219 | if (arg < 0.01 || arg > 100) { |
220 | av_log(ctx, AV_LOG_ERROR, |
221 | "Tempo scale factor '%f' out of range\n", arg); |
222 | return AVERROR(EINVAL); |
223 | } |
224 | rubberband_set_time_ratio(s->rbs, 1. / arg); |
225 | } |
226 | |
227 | if (!strcmp(cmd, "pitch")) { |
228 | double arg; |
229 | |
230 | sscanf(args, "%lf", &arg); |
231 | if (arg < 0.01 || arg > 100) { |
232 | av_log(ctx, AV_LOG_ERROR, |
233 | "Pitch scale factor '%f' out of range\n", arg); |
234 | return AVERROR(EINVAL); |
235 | } |
236 | rubberband_set_pitch_scale(s->rbs, arg); |
237 | } |
238 | |
239 | return 0; |
240 | } |
241 | |
242 | static const AVFilterPad rubberband_inputs[] = { |
243 | { |
244 | .name = "default", |
245 | .type = AVMEDIA_TYPE_AUDIO, |
246 | .config_props = config_input, |
247 | .filter_frame = filter_frame, |
248 | }, |
249 | { NULL } |
250 | }; |
251 | |
252 | static const AVFilterPad rubberband_outputs[] = { |
253 | { |
254 | .name = "default", |
255 | .type = AVMEDIA_TYPE_AUDIO, |
256 | .request_frame = request_frame, |
257 | }, |
258 | { NULL } |
259 | }; |
260 | |
261 | AVFilter ff_af_rubberband = { |
262 | .name = "rubberband", |
263 | .description = NULL_IF_CONFIG_SMALL("Apply time-stretching and pitch-shifting."), |
264 | .query_formats = query_formats, |
265 | .priv_size = sizeof(RubberBandContext), |
266 | .priv_class = &rubberband_class, |
267 | .uninit = uninit, |
268 | .inputs = rubberband_inputs, |
269 | .outputs = rubberband_outputs, |
270 | .process_command = process_command, |
271 | }; |
272 |