diff --git a/NAM/slimmable_wavenet.cpp b/NAM/slimmable_wavenet.cpp index 2357032..5be26e2 100644 --- a/NAM/slimmable_wavenet.cpp +++ b/NAM/slimmable_wavenet.cpp @@ -3,6 +3,7 @@ #include #include +#include #include namespace nam @@ -120,6 +121,12 @@ std::vector extract_slimmed_weights(const std::vector 1 is not " + "implemented)"); + } validate_groups(p); const int full_ch = p.channels; @@ -258,8 +265,9 @@ std::vector modify_params_for_channels( int new_head_size = (i < num_arrays - 1) ? new_channels_per_array[i + 1] : p.head_size; modified.push_back(wavenet::LayerArrayParams( - new_input_size, p.condition_size, new_head_size, new_ch, new_bottleneck, std::vector(p.kernel_sizes), - std::vector(p.dilations), std::vector(p.activation_configs), + new_input_size, p.condition_size, new_head_size, p.head_kernel_size, new_ch, new_bottleneck, + std::vector(p.kernel_sizes), std::vector(p.dilations), + std::vector(p.activation_configs), std::vector(p.gating_modes), p.head_bias, p.groups_input, p.groups_input_mixin, p.layer1x1_params, p.head1x1_params, std::vector(p.secondary_activation_configs), p.conv_pre_film_params, p.conv_post_film_params, p.input_mixin_pre_film_params, p.input_mixin_post_film_params, @@ -326,6 +334,9 @@ SlimmableWavenet::SlimmableWavenet(std::vector origin if (!any_slimmable) throw std::runtime_error("SlimmableWavenet: at least one layer array must have allowed_channels"); + if (with_head) + throw std::runtime_error("SlimmableWavenet: post-stack head is not supported"); + // Build with full channel counts as default (ratio=1.0) std::vector full_channels(_original_params.size()); for (size_t i = 0; i < _original_params.size(); i++) @@ -360,8 +371,8 @@ void SlimmableWavenet::_rebuild_model(const std::vector& target_channels) condition_dsp = get_dsp(_condition_dsp_json); double sampleRate = _current_sample_rate > 0 ? _current_sample_rate : GetExpectedSampleRate(); - _active_model = std::make_unique( - _in_channels, *params_ptr, _head_scale, _with_head, std::move(weights), std::move(condition_dsp), sampleRate); + _active_model = std::make_unique(_in_channels, *params_ptr, _head_scale, _with_head, std::nullopt, + std::move(weights), std::move(condition_dsp), sampleRate); _current_channels = target_channels; if (_current_buffer_size > 0) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 8ddce1a..f45b9f4 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -12,6 +12,75 @@ #include "slimmable_wavenet.h" #include "wavenet.h" +// PostStackHead (WaveNet post-stack head) ===================================== + +nam::wavenet::PostStackHead::PostStackHead(const WaveNetHeadParams& params) +: _in_channels(params.in_channels) +, _out_channels(params.out_channels) +{ + if (params.kernel_sizes.empty()) + throw std::runtime_error("PostStackHead: kernel_sizes must be non-empty"); + const size_t n = params.kernel_sizes.size(); + int cin = params.in_channels; + for (size_t i = 0; i < n; i++) + { + const int cout = (i + 1 == n) ? params.out_channels : params.channels; + const int k = params.kernel_sizes[i]; + if (k < 1) + throw std::runtime_error("PostStackHead: kernel_sizes entries must be >= 1"); + nam::activations::Activation::Ptr act = nam::activations::Activation::get_activation(params.activation_config); + if (act == nullptr) + throw std::runtime_error("PostStackHead: unsupported activation for post-stack head"); + _activations.push_back(std::move(act)); + nam::Conv1D conv; + conv.set_size_(cin, cout, k, true, 1, 1); + _convs.push_back(std::move(conv)); + cin = cout; + } +} + +void nam::wavenet::PostStackHead::set_weights_(std::vector::iterator& weights) +{ + for (size_t i = 0; i < _convs.size(); i++) + _convs[i].set_weights_(weights); +} + +void nam::wavenet::PostStackHead::SetMaxBufferSize(const int maxBufferSize) +{ + for (size_t i = 0; i < _convs.size(); i++) + _convs[i].SetMaxBufferSize(maxBufferSize); +} + +long nam::wavenet::PostStackHead::receptive_field() const +{ + long rf = 1; + for (size_t i = 0; i < _convs.size(); i++) + { + const long k = _convs[i].get_kernel_size(); + rf += k - 1; + } + return rf; +} + +void nam::wavenet::PostStackHead::process(Eigen::MatrixXf& work, const int num_frames) +{ + for (size_t i = 0; i < _convs.size(); i++) + { + const long in_ch = _convs[i].get_in_channels(); + if (i == 0) + { + _activations[i]->apply(work.data(), (long)(in_ch * num_frames)); + _convs[i].Process(work, num_frames); + } + else + { + auto& prev = _convs[i - 1].GetOutput(); + _activations[i]->apply(prev.data(), (long)(in_ch * num_frames)); + _convs[i].Process(prev, num_frames); + } + } +} + // Layer ====================================================================== void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize) @@ -306,7 +375,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma nam::wavenet::_LayerArray::_LayerArray(const LayerArrayParams& params) : _rechannel(params.input_size, params.channels, false) , _head_rechannel(params.head1x1_params.active ? params.head1x1_params.out_channels : params.bottleneck, - params.head_size, params.head_bias) + params.head_size, params.head_kernel_size, params.head_bias ? 1 : 0, 1, 1) , _head_output_size(params.head1x1_params.active ? params.head1x1_params.out_channels : params.bottleneck) { const size_t num_layers = params.dilations.size(); @@ -345,6 +414,7 @@ long nam::wavenet::_LayerArray::get_receptive_field() const long result = 0; for (size_t i = 0; i < this->_layers.size(); i++) result += this->_layers[i].get_dilation() * (this->_layers[i].get_kernel_size() - 1); + result += (long)this->_head_rechannel.get_kernel_size() - 1; return result; } @@ -431,8 +501,8 @@ void nam::wavenet::_LayerArray::ProcessInner(const Eigen::MatrixXf& layer_inputs this->_layers[last_layer].GetOutputNextLayer().leftCols(num_frames); #endif - // Process head rechannel - _head_rechannel.process_(this->_head_inputs, num_frames); + // Process head rechannel (causal Conv1D) + _head_rechannel.Process(this->_head_inputs, num_frames); } @@ -460,16 +530,27 @@ long nam::wavenet::_LayerArray::_get_channels() const return this->_layers.size() > 0 ? this->_layers[0].get_channels() : 0; } +namespace +{ +int wave_net_output_channels(const std::vector& layer_array_params, + const bool with_head, const std::optional& head_params) +{ + if (layer_array_params.empty()) + throw std::runtime_error("WaveNet requires at least one layer array"); + if (with_head && head_params.has_value()) + return head_params->out_channels; + return layer_array_params.back().head_size; +} +} // namespace + // WaveNet ==================================================================== nam::wavenet::WaveNet::WaveNet(const int in_channels, const std::vector& layer_array_params, - const float head_scale, const bool with_head, std::vector weights, + const float head_scale, const bool with_head, + std::optional head_params, std::vector weights, std::unique_ptr condition_dsp, const double expected_sample_rate) -: DSP(in_channels, - layer_array_params.empty() ? throw std::runtime_error("WaveNet requires at least one layer array") - : layer_array_params.back().head_size, - expected_sample_rate) +: DSP(in_channels, wave_net_output_channels(layer_array_params, with_head, head_params), expected_sample_rate) , _condition_dsp(std::move(condition_dsp)) , _head_scale(head_scale) { @@ -484,10 +565,22 @@ nam::wavenet::WaveNet::WaveNet(const int in_channels, throw std::runtime_error(ss.str().c_str()); } } - if (layer_array_params.empty()) - throw std::runtime_error("WaveNet requires at least one layer array"); if (with_head) - throw std::runtime_error("Head not implemented!"); + { + if (!head_params.has_value()) + throw std::runtime_error("WaveNet: with_head is true but head configuration is missing"); + if (head_params->in_channels != layer_array_params.back().head_size) + { + std::stringstream ss; + ss << "WaveNet head in_channels (" << head_params->in_channels << ") must match last layer array head_size (" + << layer_array_params.back().head_size << ")"; + throw std::runtime_error(ss.str()); + } + this->_post_stack_head = std::make_unique(*head_params); + } + else if (head_params.has_value()) + throw std::runtime_error("WaveNet: head configuration provided but with_head is false"); + for (size_t i = 0; i < layer_array_params.size(); i++) { // Quick assert that the condition_dsp will output compatibly with this layer array @@ -518,6 +611,8 @@ nam::wavenet::WaveNet::WaveNet(const int in_channels, mPrewarmSamples = this->_condition_dsp != nullptr ? this->_condition_dsp->PrewarmSamples() : 1; for (size_t i = 0; i < this->_layer_arrays.size(); i++) mPrewarmSamples += this->_layer_arrays[i].get_receptive_field(); + if (this->_post_stack_head != nullptr) + mPrewarmSamples += this->_post_stack_head->receptive_field() - 1; } void nam::wavenet::WaveNet::set_weights_(std::vector& weights) @@ -527,6 +622,8 @@ void nam::wavenet::WaveNet::set_weights_(std::vector& weights) // so we don't need to set its weights here. for (size_t i = 0; i < this->_layer_arrays.size(); i++) this->_layer_arrays[i].set_weights_(it); + if (this->_post_stack_head != nullptr) + this->_post_stack_head->set_weights_(it); this->_head_scale = *(it++); // TODO `LayerArray.absorb_head_scale()` if (it != weights.end()) { @@ -579,6 +676,12 @@ void nam::wavenet::WaveNet::SetMaxBufferSize(const int maxBufferSize) for (size_t i = 0; i < this->_layer_arrays.size(); i++) this->_layer_arrays[i].SetMaxBufferSize(maxBufferSize); + + if (this->_post_stack_head != nullptr) + { + this->_post_stack_head->SetMaxBufferSize(maxBufferSize); + this->_scaled_head_scratch.resize(this->_post_stack_head->in_channels(), maxBufferSize); + } } void nam::wavenet::WaveNet::_process_condition(const int num_frames) @@ -656,9 +759,39 @@ void nam::wavenet::WaveNet::process(NAM_SAMPLE** input, NAM_SAMPLE** output, con } } - // (Head not implemented) - auto& final_head_outputs = this->_layer_arrays.back().GetHeadOutputs(); + + if (this->_post_stack_head != nullptr) + { + assert(final_head_outputs.rows() == this->_post_stack_head->in_channels()); + const int head_in = this->_post_stack_head->in_channels(); + for (int ch = 0; ch < head_in; ch++) + { + for (int s = 0; s < num_frames; s++) + this->_scaled_head_scratch(ch, s) = this->_head_scale * final_head_outputs(ch, s); + } + this->_post_stack_head->process(this->_scaled_head_scratch, num_frames); + const Eigen::MatrixXf& head_out = this->_post_stack_head->get_last_output(); + assert(head_out.rows() == out_channels); + + if (out_channels == 1) + { + const float* __restrict__ src = head_out.data(); + NAM_SAMPLE* __restrict__ dst = output[0]; + for (int s = 0; s < num_frames; s++) + dst[s] = (NAM_SAMPLE)src[s]; + } + else + { + for (int ch = 0; ch < out_channels; ch++) + { + for (int s = 0; s < num_frames; s++) + output[ch][s] = (NAM_SAMPLE)head_out(ch, s); + } + } + return; + } + assert(final_head_outputs.rows() == out_channels); // Optimized output copy with head_scale multiplication @@ -729,7 +862,41 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json const int input_size = layer_config["input_size"]; const int condition_size = layer_config["condition_size"]; - const int head_size = layer_config["head_size"]; + + int head_size = 0; + int head_kernel_size = 1; + bool head_bias = false; + + // Prefer nested "head" (matches trainer export). Legacy .nam uses head_size + head_bias (implicit kernel 1). + if (layer_config.find("head") != layer_config.end() && !layer_config["head"].is_null()) + { + const auto& head_json = layer_config["head"]; + if (!head_json.is_object()) + { + throw std::runtime_error("Layer array " + std::to_string(i) + ": 'head' must be a JSON object"); + } + head_size = head_json.at("out_channels").get(); + head_kernel_size = head_json.at("kernel_size").get(); + head_bias = head_json.at("bias").get(); + } + else if (layer_config.find("head_size") != layer_config.end()) + { + head_size = layer_config["head_size"].get(); + head_kernel_size = 1; + head_bias = layer_config.at("head_bias").get(); + } + else + { + throw std::runtime_error("Layer array " + std::to_string(i) + + ": expected 'head' object with out_channels, kernel_size, and bias, " + "or legacy 'head_size' and 'head_bias'"); + } + + if (head_kernel_size < 1) + { + throw std::runtime_error("Layer array " + std::to_string(i) + ": head.kernel_size must be >= 1"); + } + const auto dilations = layer_config["dilations"]; const size_t num_layers = dilations.size(); @@ -921,8 +1088,6 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json secondary_activation_configs.resize(num_layers, activations::ActivationConfig{}); } - const bool head_bias = layer_config["head_bias"]; - // Parse head1x1 parameters bool head1x1_active = false; int head1x1_out_channels = channels; @@ -967,7 +1132,7 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json } wc.layer_array_params.push_back(nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), dilations, + input_size, condition_size, head_size, head_kernel_size, channels, bottleneck, std::move(kernel_sizes), dilations, std::move(activation_configs), std::move(gating_modes), head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), conv_pre_film_params, conv_post_film_params, input_mixin_pre_film_params, input_mixin_post_film_params, activation_pre_film_params, @@ -981,14 +1146,44 @@ nam::wavenet::WaveNetConfig nam::wavenet::parse_config_json(const nlohmann::json if (wc.layer_array_params.empty()) throw std::runtime_error("WaveNet config requires at least one layer array"); + if (wc.with_head) + { + const nlohmann::json& hj = config["head"]; + WaveNetHeadParams hp; + const int implied_in = wc.layer_array_params.back().head_size; + // New trainer export omits in_channels (single source: last layer head_size). Legacy .nam may include it. + if (hj.find("in_channels") != hj.end() && !hj["in_channels"].is_null()) + { + const int legacy_in = hj["in_channels"].get(); + if (legacy_in != implied_in) + { + std::stringstream ss; + ss << "WaveNet config: head.in_channels (" << legacy_in << ") must equal last layer's head_size (" << implied_in + << ")"; + throw std::runtime_error(ss.str()); + } + } + hp.in_channels = implied_in; + hp.channels = hj.at("channels").get(); + hp.out_channels = hj.at("out_channels").get(); + hp.kernel_sizes = hj.at("kernel_sizes").get>(); + hp.activation_config = nam::activations::ActivationConfig::from_json(hj.at("activation")); + if (hp.kernel_sizes.empty()) + throw std::runtime_error("WaveNet config: head.kernel_sizes must be non-empty"); + wc.head_params = std::move(hp); + } + else + wc.head_params = std::nullopt; + return wc; } // WaveNetConfig::create() std::unique_ptr nam::wavenet::WaveNetConfig::create(std::vector weights, double sampleRate) { - return std::make_unique( - in_channels, layer_array_params, head_scale, with_head, std::move(weights), std::move(condition_dsp), sampleRate); + return std::make_unique(in_channels, layer_array_params, head_scale, with_head, + std::move(head_params), std::move(weights), std::move(condition_dsp), + sampleRate); } namespace diff --git a/NAM/wavenet.h b/NAM/wavenet.h index 6cb43e5..4aeafe3 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -435,6 +436,7 @@ class LayerArrayParams /// \param dilations_ Vector of dilation factors, one per layer /// \param activation_configs_ Vector of primary activation configurations, one per layer /// \param gating_modes_ Vector of gating modes, one per layer + /// \param head_kernel_size_ Kernel size of the head rechannel conv (>= 1) /// \param head_bias_ Whether to use bias in the head rechannel /// \param groups_input Number of groups for input convolutions /// \param groups_input_mixin_ Number of groups for input mixin convolutions @@ -451,8 +453,9 @@ class LayerArrayParams /// \param head1x1_post_film_params_ FiLM parameters after head1x1 convolutions /// \throws std::invalid_argument If dilations, activation_configs, gating_modes, or secondary_activation_configs /// sizes don't match - LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_, - const int bottleneck_, const std::vector&& kernel_sizes_, const std::vector&& dilations_, + LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int head_kernel_size_, + const int channels_, const int bottleneck_, const std::vector&& kernel_sizes_, + const std::vector&& dilations_, const std::vector&& activation_configs_, const std::vector&& gating_modes_, const bool head_bias_, const int groups_input, const int groups_input_mixin_, const Layer1x1Params& layer1x1_params_, @@ -465,6 +468,7 @@ class LayerArrayParams : input_size(input_size_) , condition_size(condition_size_) , head_size(head_size_) + , head_kernel_size(head_kernel_size_) , channels(channels_) , bottleneck(bottleneck_) , kernel_sizes(std::move(kernel_sizes_)) @@ -486,6 +490,10 @@ class LayerArrayParams , _layer1x1_post_film_params(_layer1x1_post_film_params_) , head1x1_post_film_params(head1x1_post_film_params_) { + if (head_kernel_size < 1) + { + throw std::invalid_argument("LayerArrayParams: head_kernel_size must be >= 1"); + } const size_t num_layers = dilations.size(); if (kernel_sizes.empty()) { @@ -518,6 +526,7 @@ class LayerArrayParams const int input_size; ///< Input size (number of channels) const int condition_size; ///< Size of conditioning input const int head_size; ///< Size of head output (after rechannel) + const int head_kernel_size; ///< Kernel size of head rechannel convolution (>= 1) const int channels; ///< Number of channels in each layer const int bottleneck; ///< Bottleneck size (internal channel count) std::vector kernel_sizes; ///< Per-layer kernel sizes, one per layer @@ -628,8 +637,8 @@ class _LayerArray // Size is _head_output_size (= head1x1.out_channels if head1x1 active, else bottleneck) Eigen::MatrixXf _head_inputs; - // Rechannel for the head (_head_output_size -> head_size) - Conv1x1 _head_rechannel; + // Rechannel for the head (_head_output_size -> head_size), causal Conv1D (dilation 1) + Conv1D _head_rechannel; // Head output size from each layer (head1x1.out_channels if active, else bottleneck) const int _head_output_size; @@ -639,6 +648,42 @@ class _LayerArray void ProcessInner(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, const int num_frames); }; +/// \brief Parameters for the optional post-stack head (matches Python ``nam.models.wavenet._head.Head``). +/// JSON export omits ``in_channels`` (implied by last layer array ``head_size``); load sets it from there. +struct WaveNetHeadParams +{ + int in_channels; + int channels; + int out_channels; + std::vector kernel_sizes; + activations::ActivationConfig activation_config; +}; + +/// \brief Post-stack head: repeated (activation → Conv1D) with dilation 1, stride 1, valid (causal streaming) conv. +class PostStackHead +{ +public: + explicit PostStackHead(const WaveNetHeadParams& params); + + void set_weights_(std::vector::iterator& weights); + void SetMaxBufferSize(int maxBufferSize); + long receptive_field() const; + int in_channels() const { return _in_channels; } + int out_channels() const { return _out_channels; } + + /// \param work Input buffer (in_channels × maxBufferSize); first in_channels×num_frames scaled by head_scale; + /// may be modified in place. + void process(Eigen::MatrixXf& work, int num_frames); + + const Eigen::MatrixXf& get_last_output() const { return _convs.back().GetOutput(); } + +private: + std::vector _convs; + std::vector _activations; + int _in_channels; + int _out_channels; +}; + /// \brief The main WaveNet model /// /// WaveNet is a dilated convolutional neural network architecture for audio processing. @@ -657,13 +702,14 @@ class WaveNet : public DSP /// \param in_channels Number of input channels /// \param layer_array_params Parameters for each layer array /// \param head_scale Scaling factor applied to the final head output - /// \param with_head Whether to use a custom "head" module that further processes the output (not currently supported) + /// \param with_head Whether to apply the optional post-stack head (Conv1D stack after layer arrays) + /// \param head_params Configuration for the post-stack head when ``with_head`` is true /// \param weights Model weights (will be consumed during construction) /// \param condition_dsp Optional DSP module for processing the conditioning input /// \param expected_sample_rate Expected sample rate in Hz (-1.0 if unknown) WaveNet(const int in_channels, const std::vector& layer_array_params, const float head_scale, - const bool with_head, std::vector weights, std::unique_ptr condition_dsp, - const double expected_sample_rate = -1.0); + const bool with_head, std::optional head_params, std::vector weights, + std::unique_ptr condition_dsp, const double expected_sample_rate = -1.0); /// \brief Destructor ~WaveNet() = default; @@ -725,6 +771,10 @@ class WaveNet : public DSP float _head_scale; + std::unique_ptr _post_stack_head; + /// Scratch (in_channels × maxBufferSize) for scaled head input when ``_post_stack_head`` is used + Eigen::MatrixXf _scaled_head_scratch; + int mPrewarmSamples = 0; // Pre-compute during initialization int PrewarmSamples() override { return mPrewarmSamples; }; }; @@ -736,6 +786,7 @@ struct WaveNetConfig : public ModelConfig std::vector layer_array_params; float head_scale; bool with_head; + std::optional head_params; std::unique_ptr condition_dsp; // Move-only due to unique_ptr diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index 38aa5b5..5908dbc 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -18,6 +18,8 @@ #include "test/test_wavenet/test_real_time_safe.cpp" #include "test/test_wavenet/test_condition_processing.cpp" #include "test/test_wavenet/test_head1x1.cpp" +#include "test/test_wavenet/test_output_head.cpp" +#include "test/test_wavenet/test_layer_head_config.cpp" #include "test/test_wavenet/test_layer1x1.cpp" #include "test/test_wavenet/test_factory.cpp" #include "test/test_gating_activations.cpp" @@ -159,6 +161,11 @@ int main() test_wavenet::test_layer_array::test_layer_array_with_head_input(); test_wavenet::test_layer_array::test_layer_array_different_activations(); test_wavenet::test_full::test_wavenet_model(); + test_wavenet::test_output_head::test_post_stack_head_receptive_field(); + test_wavenet::test_output_head::test_wavenet_with_post_stack_head_processes(); + test_wavenet::test_output_head::test_wavenet_with_two_layer_post_stack_head_applies_activation_per_layer_input(); + test_wavenet::test_layer_head_config::test_legacy_head_size_and_head_bias_implies_kernel_one(); + test_wavenet::test_layer_head_config::test_nested_head_with_kernel_size_three(); test_wavenet::test_full::test_wavenet_multiple_arrays(); test_wavenet::test_full::test_wavenet_zero_input(); test_wavenet::test_full::test_wavenet_different_buffer_sizes(); @@ -190,6 +197,7 @@ int main() test_wavenet::test_layer_post_activation_film_blended_realtime_safe(); test_wavenet::test_layer_array_process_realtime_safe(); test_wavenet::test_process_realtime_safe(); + test_wavenet::test_process_with_post_stack_head_realtime_safe(); test_wavenet::test_process_3in_2out_realtime_safe(); test_wavenet::test_condition_processing::test_with_condition_dsp(); test_wavenet::test_condition_processing::test_with_condition_dsp_multichannel(); diff --git a/tools/test/test_wavenet/test_condition_processing.cpp b/tools/test/test_wavenet/test_condition_processing.cpp index 5a7e0d0..f929483 100644 --- a/tools/test/test_wavenet/test_condition_processing.cpp +++ b/tools/test/test_wavenet/test_condition_processing.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -35,7 +36,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -143,7 +144,7 @@ std::unique_ptr create_simple_wavenet( weights.push_back(head_scale); return std::make_unique( - in_channels, layer_array_params, head_scale, with_head, weights, std::move(condition_dsp), 48000.0); + in_channels, layer_array_params, head_scale, with_head, std::nullopt, weights, std::move(condition_dsp), 48000.0); } // Test condition processing with condition_dsp diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp index 7f18b68..0c09c2e 100644 --- a/tools/test/test_wavenet/test_full.cpp +++ b/tools/test/test_wavenet/test_full.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "NAM/wavenet.h" @@ -34,7 +35,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -82,7 +83,7 @@ void test_wavenet_model() std::unique_ptr condition_dsp = nullptr; auto wavenet = std::make_unique( - input_size, layer_array_params, head_scale, with_head, weights, std::move(condition_dsp), 48000.0); + input_size, layer_array_params, head_scale, with_head, std::nullopt, weights, std::move(condition_dsp), 48000.0); const int numFrames = 4; const int maxBufferSize = 64; @@ -151,7 +152,7 @@ void test_wavenet_multiple_arrays() std::unique_ptr condition_dsp = nullptr; auto wavenet = std::make_unique( - input_size, layer_array_params, head_scale, with_head, weights, std::move(condition_dsp), 48000.0); + input_size, layer_array_params, head_scale, with_head, std::nullopt, weights, std::move(condition_dsp), 48000.0); const int numFrames = 4; const int maxBufferSize = 64; @@ -204,7 +205,7 @@ void test_wavenet_zero_input() std::unique_ptr condition_dsp = nullptr; auto wavenet = std::make_unique( - input_size, layer_array_params, head_scale, with_head, weights, std::move(condition_dsp), 48000.0); + input_size, layer_array_params, head_scale, with_head, std::nullopt, weights, std::move(condition_dsp), 48000.0); const int numFrames = 4; wavenet->Reset(48000.0, numFrames); @@ -256,7 +257,7 @@ void test_wavenet_different_buffer_sizes() std::unique_ptr condition_dsp = nullptr; auto wavenet = std::make_unique( - input_size, layer_array_params, head_scale, with_head, weights, std::move(condition_dsp), 48000.0); + input_size, layer_array_params, head_scale, with_head, std::nullopt, weights, std::move(condition_dsp), 48000.0); // Test with different buffer sizes wavenet->Reset(48000.0, 64); @@ -331,7 +332,7 @@ void test_wavenet_prewarm() std::unique_ptr condition_dsp = nullptr; auto wavenet = std::make_unique( - input_size, layer_array_params, head_scale, with_head, weights, std::move(condition_dsp), 48000.0); + input_size, layer_array_params, head_scale, with_head, std::nullopt, weights, std::move(condition_dsp), 48000.0); // Test that prewarm can be called without errors wavenet->Reset(48000.0, 64); diff --git a/tools/test/test_wavenet/test_layer_array.cpp b/tools/test/test_wavenet/test_layer_array.cpp index f91ff98..0dc1fde 100644 --- a/tools/test/test_wavenet/test_layer_array.cpp +++ b/tools/test/test_wavenet/test_layer_array.cpp @@ -38,7 +38,7 @@ static nam::wavenet::_LayerArray make_layer_array(const int input_size, const in std::vector dilations_copy = dilations; // Make a copy since we need to move it std::vector kernel_sizes(dilations.size(), kernel_size); nam::wavenet::LayerArrayParams params( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), + input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -223,7 +223,7 @@ void test_layer_array_different_activations() auto film_params = make_default_film_params(); std::vector kernel_sizes(dilations.size(), kernel_size); nam::wavenet::LayerArrayParams params( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -306,7 +306,7 @@ void test_layer_array_different_activations() dilations_all_relu.size(), nam::activations::ActivationConfig{}); std::vector kernel_sizes_all_relu(dilations_all_relu.size(), kernel_size); nam::wavenet::LayerArrayParams params_all_relu( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes_all_relu), + input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes_all_relu), std::move(dilations_all_relu), std::move(all_relu_configs), std::move(all_none_gating_modes), head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, std::move(all_empty_secondary_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); diff --git a/tools/test/test_wavenet/test_layer_head_config.cpp b/tools/test/test_wavenet/test_layer_head_config.cpp new file mode 100644 index 0000000..847810a --- /dev/null +++ b/tools/test/test_wavenet/test_layer_head_config.cpp @@ -0,0 +1,68 @@ +// Layer-array head JSON: legacy head_size/head_bias vs nested "head" (out_channels, kernel_size, bias) + +#include +#include + +#include "json.hpp" + +#include "NAM/wavenet.h" + +namespace test_wavenet +{ +namespace test_layer_head_config +{ + +void test_legacy_head_size_and_head_bias_implies_kernel_one() +{ + const std::string configStr = R"({ + "layers": [{ + "input_size": 1, + "condition_size": 1, + "head_size": 2, + "channels": 2, + "kernel_size": 1, + "dilations": [1], + "activation": "ReLU", + "head_bias": false + }], + "head_scale": 1.0 + })"; + + const nlohmann::json j = nlohmann::json::parse(configStr); + const auto wc = nam::wavenet::parse_config_json(j, 48000.0); + assert(wc.layer_array_params.size() == 1); + const auto& p = wc.layer_array_params[0]; + assert(p.head_size == 2); + assert(p.head_kernel_size == 1); + assert(p.head_bias == false); +} + +void test_nested_head_with_kernel_size_three() +{ + const std::string configStr = R"({ + "layers": [{ + "input_size": 1, + "condition_size": 1, + "head": {"out_channels": 1, "kernel_size": 3, "bias": true}, + "channels": 2, + "kernel_size": 1, + "dilations": [1], + "activation": "ReLU" + }], + "head_scale": 1.0 + })"; + + const nlohmann::json j = nlohmann::json::parse(configStr); + const auto wc = nam::wavenet::parse_config_json(j, 48000.0); + assert(wc.layer_array_params.size() == 1); + const auto& p = wc.layer_array_params[0]; + assert(p.head_size == 1); + assert(p.head_kernel_size == 3); + assert(p.head_bias == true); + + nam::wavenet::_LayerArray array(p); + assert(array.get_receptive_field() == 2); // one dilated layer: 0 + (3-1) head rechannel +} + +} // namespace test_layer_head_config +} // namespace test_wavenet diff --git a/tools/test/test_wavenet/test_output_head.cpp b/tools/test/test_wavenet/test_output_head.cpp new file mode 100644 index 0000000..4762df5 --- /dev/null +++ b/tools/test/test_wavenet/test_output_head.cpp @@ -0,0 +1,189 @@ +// Tests for WaveNet post-stack head (Python ``Head`` module) + +#include +#include +#include +#include +#include +#include + +#include "NAM/wavenet.h" + +namespace test_wavenet +{ +namespace test_output_head +{ + +static nam::wavenet::_FiLMParams make_inactive_film() +{ + return nam::wavenet::_FiLMParams(false, false); +} + +static nam::wavenet::LayerArrayParams make_layer_array_params( + const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, + std::vector&& kernel_sizes, std::vector&& dilations, + const nam::activations::ActivationConfig& activation_config, const nam::wavenet::GatingMode gating_mode, + const bool head_bias, const int groups_input, const int groups_input_mixin, + const nam::wavenet::Layer1x1Params& layer1x1_params, const nam::wavenet::Head1x1Params& head1x1_params, + const nam::activations::ActivationConfig& secondary_activation_config) +{ + auto film = make_inactive_film(); + std::vector activation_configs(dilations.size(), activation_config); + std::vector gating_modes(dilations.size(), gating_mode); + std::vector secondary_activation_configs( + dilations.size(), secondary_activation_config); + return nam::wavenet::LayerArrayParams(input_size, condition_size, head_size, 1, channels, bottleneck, + std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), + std::move(gating_modes), head_bias, groups_input, groups_input_mixin, + layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film, + film, film, film, film, film, film, film); +} + +void test_post_stack_head_receptive_field() +{ + nam::wavenet::WaveNetHeadParams p; + p.in_channels = 2; + p.channels = 3; + p.out_channels = 1; + p.kernel_sizes = {3, 5}; + p.activation_config = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::Tanh); + nam::wavenet::PostStackHead head(p); + // Python: 1 + (3-1) + (5-1) = 7 + assert(head.receptive_field() == 7); +} + +void test_wavenet_with_post_stack_head_processes() +{ + const int input_size = 1; + const int condition_size = 1; + const int head_size = 1; + const int channels = 1; + const int bottleneck = channels; + const int kernel_size = 1; + std::vector dilations{1}; + std::vector kernel_sizes(dilations.size(), kernel_size); + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const bool head_bias = false; + const float head_scale = 0.5f; + const bool with_head = true; + const int groups = 1; + const int groups_input_mixin = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + nam::activations::ActivationConfig empty_config{}; + nam::wavenet::LayerArrayParams layer_params = make_layer_array_params( + input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + activation, gating_mode, head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, empty_config); + std::vector layer_array_params; + layer_array_params.push_back(std::move(layer_params)); + + nam::wavenet::WaveNetHeadParams hp; + hp.in_channels = 1; + hp.channels = 1; + hp.out_channels = 1; + hp.kernel_sizes = {1}; + hp.activation_config = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::Tanh); + + std::vector weights; + weights.push_back(1.0f); // Rechannel + weights.insert(weights.end(), {1.0f, 0.0f, 1.0f, 1.0f, 0.0f}); // Layer 0 + weights.push_back(1.0f); // Head rechannel + weights.push_back(1.0f); // Post-stack conv weight (1x1) + weights.push_back(0.0f); // Post-stack conv bias + weights.push_back(head_scale); + + std::unique_ptr condition_dsp = nullptr; + auto wavenet = std::make_unique(input_size, layer_array_params, head_scale, with_head, + std::optional(std::move(hp)), + std::move(weights), std::move(condition_dsp), 48000.0); + + const int numFrames = 8; + const int maxBufferSize = 64; + wavenet->Reset(48000.0, maxBufferSize); + wavenet->prewarm(); + + std::vector input(numFrames, 0.1f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + + wavenet->process(inputPtrs, outputPtrs, numFrames); + + for (int i = 0; i < numFrames; i++) + assert(std::isfinite(output[i])); +} + +void test_wavenet_with_two_layer_post_stack_head_applies_activation_per_layer_input() +{ + // Regression for multi-layer post-stack head execution: + // each layer must apply its activation to that layer's input, not always the + // original head input buffer. + const int input_size = 1; + const int condition_size = 1; + const int head_size = 1; + const int channels = 1; + const int bottleneck = channels; + const int kernel_size = 1; + std::vector dilations{1}; + std::vector kernel_sizes(dilations.size(), kernel_size); + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const bool head_bias = false; + const float head_scale = 1.0f; + const bool with_head = true; + const int groups = 1; + const int groups_input_mixin = 1; + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + nam::activations::ActivationConfig empty_config{}; + nam::wavenet::LayerArrayParams layer_params = make_layer_array_params( + input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + activation, gating_mode, head_bias, groups, groups_input_mixin, layer1x1_params, head1x1_params, empty_config); + std::vector layer_array_params; + layer_array_params.push_back(std::move(layer_params)); + + nam::wavenet::WaveNetHeadParams hp; + hp.in_channels = 1; + hp.channels = 1; + hp.out_channels = 1; + hp.kernel_sizes = {1, 1}; + hp.activation_config = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + + std::vector weights; + // Main WaveNet (single 1x1 layer array), identity mapping to head input: + weights.push_back(1.0f); // Rechannel weight + weights.insert(weights.end(), {1.0f, 0.0f, 1.0f, 1.0f, 0.0f}); // Layer 0 weights + weights.push_back(1.0f); // Head rechannel weight + // Post-stack head (2x [ReLU -> Conv1d(k=1)]): + // First conv: y = -1*x + 0 + // Second conv: y = 2*x + 0 + // For negative input, correct chain gives 0 (ReLU before second conv on first conv output). + weights.push_back(-1.0f); // Head layer 0 conv weight + weights.push_back(0.0f); // Head layer 0 conv bias + weights.push_back(2.0f); // Head layer 1 conv weight + weights.push_back(0.0f); // Head layer 1 conv bias + weights.push_back(head_scale); + + std::unique_ptr condition_dsp = nullptr; + auto wavenet = std::make_unique(input_size, layer_array_params, head_scale, with_head, + std::optional(std::move(hp)), + std::move(weights), std::move(condition_dsp), 48000.0); + + const int numFrames = 8; + const int maxBufferSize = 64; + wavenet->Reset(48000.0, maxBufferSize); + wavenet->prewarm(); + + std::vector input(numFrames, -0.25f); + std::vector output(numFrames, 0.0f); + NAM_SAMPLE* inputPtrs[] = {input.data()}; + NAM_SAMPLE* outputPtrs[] = {output.data()}; + wavenet->process(inputPtrs, outputPtrs, numFrames); + + for (int i = 0; i < numFrames; i++) + assert(std::fabs(output[i]) < 1.0e-6f); +} + +} // namespace test_output_head +} // namespace test_wavenet diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index 9bea8e9..b590551 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -60,7 +61,7 @@ static nam::wavenet::_LayerArray make_layer_array(const int input_size, const in std::vector dilations_copy = dilations; // Make a copy since we need to move it std::vector kernel_sizes(dilations.size(), kernel_size); nam::wavenet::LayerArrayParams params( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), + input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -83,7 +84,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -1032,7 +1033,7 @@ void test_process_realtime_safe() std::unique_ptr condition_dsp = nullptr; auto wavenet = std::make_unique( - input_size, layer_array_params, head_scale, with_head, weights, std::move(condition_dsp), 48000.0); + input_size, layer_array_params, head_scale, with_head, std::nullopt, weights, std::move(condition_dsp), 48000.0); const int maxBufferSize = 256; wavenet->Reset(48000.0, maxBufferSize); @@ -1154,7 +1155,7 @@ void test_process_3in_2out_realtime_safe() const int in_channels = 3; std::unique_ptr condition_dsp = nullptr; auto wavenet = std::make_unique( - in_channels, layer_array_params, head_scale, with_head, weights, std::move(condition_dsp), 48000.0); + in_channels, layer_array_params, head_scale, with_head, std::nullopt, weights, std::move(condition_dsp), 48000.0); const int maxBufferSize = 256; wavenet->Reset(48000.0, maxBufferSize); @@ -1194,4 +1195,78 @@ void test_process_3in_2out_realtime_safe() } } } + +// WaveNet::process() with optional post-stack head (multi-layer PostStackHead) must not allocate or free. +void test_process_with_post_stack_head_realtime_safe() +{ + const int input_size = 1; + const int condition_size = 1; + const int head_size = 1; + const int channels = 1; + const int bottleneck = channels; + const int kernel_size = 1; + std::vector dilations{1}; + std::vector kernel_sizes(dilations.size(), kernel_size); + const auto activation = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const bool head_bias = false; + const float head_scale = 1.0f; + const bool with_head = true; + const int groups = 1; + const int groups_input_mixin = 1; + + nam::wavenet::Layer1x1Params layer1x1_params(true, 1); + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + std::vector layer_array_params; + layer_array_params.push_back( + make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), + std::move(dilations), activation, gating_mode, head_bias, groups, groups_input_mixin, + layer1x1_params, head1x1_params, nam::activations::ActivationConfig{})); + + nam::wavenet::WaveNetHeadParams head_params; + head_params.in_channels = 1; + head_params.channels = 1; + head_params.out_channels = 1; + head_params.kernel_sizes = {1, 1}; + head_params.activation_config = nam::activations::ActivationConfig::simple(nam::activations::ActivationType::ReLU); + + std::vector weights; + weights.push_back(1.0f); // Rechannel + weights.insert(weights.end(), {1.0f, 0.0f, 1.0f, 1.0f, 0.0f}); // Layer 0 + weights.push_back(1.0f); // Head rechannel + weights.push_back(-1.0f); // Post-stack head layer 0 conv weight + weights.push_back(0.0f); // Post-stack head layer 0 conv bias + weights.push_back(2.0f); // Post-stack head layer 1 conv weight + weights.push_back(0.0f); // Post-stack head layer 1 conv bias + weights.push_back(head_scale); + + std::unique_ptr condition_dsp = nullptr; + auto wavenet = + std::make_unique(input_size, layer_array_params, head_scale, with_head, + std::optional(std::move(head_params)), + std::move(weights), std::move(condition_dsp), 48000.0); + + const int maxBufferSize = 256; + wavenet->Reset(48000.0, maxBufferSize); + + const std::vector buffer_sizes{1, 8, 16, 32, 64, 128, 256}; + for (const int buffer_size : buffer_sizes) + { + std::vector input(buffer_size, -0.25f); + std::vector output(buffer_size, 0.0f); + + const std::string test_name = "WaveNet process (post-stack head) - Buffer size " + std::to_string(buffer_size); + run_allocation_test_no_allocations( + nullptr, + [&]() { + NAM_SAMPLE* input_ptrs[] = {input.data()}; + NAM_SAMPLE* output_ptrs[] = {output.data()}; + wavenet->process(input_ptrs, output_ptrs, buffer_size); + }, + nullptr, test_name.c_str()); + + for (int i = 0; i < buffer_size; i++) + assert(std::isfinite(output[i])); + } +} } // namespace test_wavenet diff --git a/tools/test/test_wavenet_configurable_gating.cpp b/tools/test/test_wavenet_configurable_gating.cpp index ee07d94..62d5a17 100644 --- a/tools/test/test_wavenet_configurable_gating.cpp +++ b/tools/test/test_wavenet_configurable_gating.cpp @@ -49,7 +49,7 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( std::vector secondary_activation_configs( dilations.size(), secondary_activation_config); return nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), + input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); @@ -75,7 +75,7 @@ static nam::wavenet::_LayerArray make_layer_array(const int input_size, const in std::vector dilations_copy = dilations; // Make a copy since we need to move it std::vector kernel_sizes(dilations.size(), kernel_size); nam::wavenet::LayerArrayParams params( - input_size, condition_size, head_size, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), + input_size, condition_size, head_size, 1, channels, bottleneck, std::move(kernel_sizes), std::move(dilations_copy), std::move(activation_configs), std::move(gating_modes), head_bias, groups_input, groups_input_mixin, layer1x1_params, head1x1_params, std::move(secondary_activation_configs), film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params);