blob: e5f917d495d0bbf405b16ed7228df41517861220
1 | /* |
2 | * audio encoder psychoacoustic model |
3 | * Copyright (C) 2008 Konstantin Shishkov |
4 | * |
5 | * This file is part of FFmpeg. |
6 | * |
7 | * FFmpeg is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Lesser General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2.1 of the License, or (at your option) any later version. |
11 | * |
12 | * FFmpeg is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Lesser General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Lesser General Public |
18 | * License along with FFmpeg; if not, write to the Free Software |
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | */ |
21 | |
22 | #ifndef AVCODEC_PSYMODEL_H |
23 | #define AVCODEC_PSYMODEL_H |
24 | |
25 | #include "avcodec.h" |
26 | |
27 | /** maximum possible number of bands */ |
28 | #define PSY_MAX_BANDS 128 |
29 | /** maximum number of channels */ |
30 | #define PSY_MAX_CHANS 20 |
31 | |
32 | /* cutoff for VBR is purposely increased, since LP filtering actually |
33 | * hinders VBR performance rather than the opposite |
34 | */ |
35 | #define AAC_CUTOFF_FROM_BITRATE(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \ |
36 | FFMAX(bit_rate/channels/5, bit_rate/channels*15/32 - 5500), \ |
37 | 3000 + bit_rate/channels/4, \ |
38 | 12000 + bit_rate/channels/16), \ |
39 | 22000, \ |
40 | sample_rate / 2): (sample_rate / 2)) |
41 | #define AAC_CUTOFF(s) ( \ |
42 | (s->flags & AV_CODEC_FLAG_QSCALE) \ |
43 | ? s->sample_rate / 2 \ |
44 | : AAC_CUTOFF_FROM_BITRATE(s->bit_rate, s->channels, s->sample_rate) \ |
45 | ) |
46 | |
47 | /** |
48 | * single band psychoacoustic information |
49 | */ |
50 | typedef struct FFPsyBand { |
51 | int bits; |
52 | float energy; |
53 | float threshold; |
54 | float spread; /* Energy spread over the band */ |
55 | } FFPsyBand; |
56 | |
57 | /** |
58 | * single channel psychoacoustic information |
59 | */ |
60 | typedef struct FFPsyChannel { |
61 | FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information |
62 | float entropy; ///< total PE for this channel |
63 | } FFPsyChannel; |
64 | |
65 | /** |
66 | * psychoacoustic information for an arbitrary group of channels |
67 | */ |
68 | typedef struct FFPsyChannelGroup { |
69 | FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group |
70 | uint8_t num_ch; ///< number of channels in this group |
71 | uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group |
72 | } FFPsyChannelGroup; |
73 | |
74 | /** |
75 | * windowing related information |
76 | */ |
77 | typedef struct FFPsyWindowInfo { |
78 | int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next |
79 | int window_shape; ///< window shape (sine/KBD/whatever) |
80 | int num_windows; ///< number of windows in a frame |
81 | int grouping[8]; ///< window grouping (for e.g. AAC) |
82 | float clipping[8]; ///< maximum absolute normalized intensity in the given window for clip avoidance |
83 | int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA) |
84 | } FFPsyWindowInfo; |
85 | |
86 | /** |
87 | * context used by psychoacoustic model |
88 | */ |
89 | typedef struct FFPsyContext { |
90 | AVCodecContext *avctx; ///< encoder context |
91 | const struct FFPsyModel *model; ///< encoder-specific model functions |
92 | |
93 | FFPsyChannel *ch; ///< single channel information |
94 | FFPsyChannelGroup *group; ///< channel group information |
95 | int num_groups; ///< number of channel groups |
96 | int cutoff; ///< lowpass frequency cutoff for analysis |
97 | |
98 | uint8_t **bands; ///< scalefactor band sizes for possible frame sizes |
99 | int *num_bands; ///< number of scalefactor bands for possible frame sizes |
100 | int num_lens; ///< number of scalefactor band sets |
101 | |
102 | struct { |
103 | int size; ///< size of the bitresevoir in bits |
104 | int bits; ///< number of bits used in the bitresevoir |
105 | int alloc; ///< number of bits allocated by the psy, or -1 if no allocation was done |
106 | } bitres; |
107 | |
108 | void* model_priv_data; ///< psychoacoustic model implementation private data |
109 | } FFPsyContext; |
110 | |
111 | /** |
112 | * codec-specific psychoacoustic model implementation |
113 | */ |
114 | typedef struct FFPsyModel { |
115 | const char *name; |
116 | int (*init) (FFPsyContext *apc); |
117 | |
118 | /** |
119 | * Suggest window sequence for channel. |
120 | * |
121 | * @param ctx model context |
122 | * @param audio samples for the current frame |
123 | * @param la lookahead samples (NULL when unavailable) |
124 | * @param channel number of channel element to analyze |
125 | * @param prev_type previous window type |
126 | * |
127 | * @return suggested window information in a structure |
128 | */ |
129 | FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type); |
130 | |
131 | /** |
132 | * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels. |
133 | * |
134 | * @param ctx model context |
135 | * @param channel channel number of the first channel in the group to perform analysis on |
136 | * @param coeffs array of pointers to the transformed coefficients |
137 | * @param wi window information for the channels in the group |
138 | */ |
139 | void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi); |
140 | |
141 | void (*end) (FFPsyContext *apc); |
142 | } FFPsyModel; |
143 | |
144 | /** |
145 | * Initialize psychoacoustic model. |
146 | * |
147 | * @param ctx model context |
148 | * @param avctx codec context |
149 | * @param num_lens number of possible frame lengths |
150 | * @param bands scalefactor band lengths for all frame lengths |
151 | * @param num_bands number of scalefactor bands for all frame lengths |
152 | * @param num_groups number of channel groups |
153 | * @param group_map array with # of channels in group - 1, for each group |
154 | * |
155 | * @return zero if successful, a negative value if not |
156 | */ |
157 | int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, |
158 | const uint8_t **bands, const int *num_bands, |
159 | int num_groups, const uint8_t *group_map); |
160 | |
161 | /** |
162 | * Determine what group a channel belongs to. |
163 | * |
164 | * @param ctx psymodel context |
165 | * @param channel channel to locate the group for |
166 | * |
167 | * @return pointer to the FFPsyChannelGroup this channel belongs to |
168 | */ |
169 | FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel); |
170 | |
171 | /** |
172 | * Cleanup model context at the end. |
173 | * |
174 | * @param ctx model context |
175 | */ |
176 | void ff_psy_end(FFPsyContext *ctx); |
177 | |
178 | |
179 | /************************************************************************** |
180 | * Audio preprocessing stuff. * |
181 | * This should be moved into some audio filter eventually. * |
182 | **************************************************************************/ |
183 | struct FFPsyPreprocessContext; |
184 | |
185 | /** |
186 | * psychoacoustic model audio preprocessing initialization |
187 | */ |
188 | struct FFPsyPreprocessContext *ff_psy_preprocess_init(AVCodecContext *avctx); |
189 | |
190 | /** |
191 | * Preprocess several channel in audio frame in order to compress it better. |
192 | * |
193 | * @param ctx preprocessing context |
194 | * @param audio samples to be filtered (in place) |
195 | * @param channels number of channel to preprocess |
196 | */ |
197 | void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels); |
198 | |
199 | /** |
200 | * Cleanup audio preprocessing module. |
201 | */ |
202 | void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); |
203 | |
204 | #endif /* AVCODEC_PSYMODEL_H */ |
205 |