// Copyright © 2023-2025 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: MIT

// clang-format off
#pragma once

#include <aotriton/config.h>
#include <aotriton/_internal/triton_kernel.h>
#include <aotriton/dtypes.h>
#include <aotriton/runtime.h>
#include <aotriton/util.h>
#include <functional>
#include <string>
#include <vector>

#if 1
namespace AOTRITON_NS::v3::flash {
    struct OpAttnBwdParams;
}
#endif

namespace AOTRITON_NS::v3::flash {

#if 1
using AOTRITON_NS::v3::flash::OpAttnBwdParams;
#else
// The parameter class must be defined here when
// There is no common operator for bwd_preprocess_varlen.
struct OpAttnBwdParams {
    const TensorView<4>* Out;
    const TensorView<4>* DO;
    const TensorView<2>* D;
    const TensorView<1>* cu_seqlens_q;
    int32_t              max_seqlen_q;
    int32_t              head_dim;
    int16_t              BLOCK_DMODEL;
    bool                 PADDED_HEAD;
};
#endif

struct BwdPreprocessVarlenContext {
    const OpAttnBwdParams *params = nullptr;
    // Performance related arguments for current selection
    int16_t BLOCK_M;

    TritonKernel* kernel_on_device = nullptr;
    int pp_args_index = -1;
    std::string_view package_path;
    std::string_view func_name;
    std::string_view arch_name;
    // Note to save ELF space, this object is constructed on the fly.
    const char* _debug_kernel_name = nullptr;
#if AOTRITON_BUILD_FOR_TUNING
    int _has_preferred_kernel = -1; // For C++ based autotune database generation
    int _total_number_of_kernels = -1;
    const char* _preferred_kernel_psels = nullptr;
    const char* _preferred_kernel_copts = nullptr;
    bool peek_kernel_image = false;
#endif

    hipError_t lookup_optimal(Gpu gpu);
    hipError_t launch(hipStream_t stream) const;

    dim3 grid_calculator() const;
    std::function<dim3(const BwdPreprocessVarlenContext&)> custom_grid_calculator;

    int64_t godel_number() const;
    static std::tuple<int, int> get_archmod_number(Gpu gpu);
    static constexpr int kMaxGodelNumber = 72;

    typedef void (*AutoTuneTableEntry)(BwdPreprocessVarlenContext& context, int mod_number);
    static AutoTuneTableEntry autotune_table[][ kMaxGodelNumber ];
};

struct BwdPreprocessVarlenMetadata {
    // Note: FEAT_CHOICES here
    static const std::vector<std::string>& get_Out_choices();
    static const std::vector<std::string>& get_D_choices();
    static const std::vector<std::string>& get_cu_seqlens_q_choices();
    static const std::vector<std::string>& get_max_seqlen_q_choices();
    static const std::vector<std::string>& get_head_dim_choices();
    static const std::vector<int>& get_BLOCK_DMODEL_choices();
    static const std::vector<bool>& get_PADDED_HEAD_choices();
};

namespace autotune {

extern const char bwd_preprocess_varlen_packed_string[];

extern int bwd_preprocess_varlen__lut_lambda__0(const OpAttnBwdParams& params, int mod_number, int8_t lut[1][1]);

void Autotune_bwd_preprocess_varlen__A0__F0(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F1(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F2(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F3(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F4(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F5(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F6(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F7(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F8(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F9(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F10(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F11(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F12(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F13(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F14(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F15(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F16(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F17(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F18(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F19(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F20(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F21(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F22(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F23(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F24(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F25(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F26(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F27(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F28(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F29(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F30(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F31(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F32(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F33(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F34(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F35(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F36(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F37(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F38(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F39(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F40(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F41(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F42(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F43(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F44(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F45(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F46(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F47(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F48(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F49(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F50(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F51(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F52(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F53(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F54(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F55(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F56(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F57(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F58(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F59(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F60(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F61(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F62(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F63(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F64(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F65(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F66(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F67(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F68(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F69(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F70(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A0__F71(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F0(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F1(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F2(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F3(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F4(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F5(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F6(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F7(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F8(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F9(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F10(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F11(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F12(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F13(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F14(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F15(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F16(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F17(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F18(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F19(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F20(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F21(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F22(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F23(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F24(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F25(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F26(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F27(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F28(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F29(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F30(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F31(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F32(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F33(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F34(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F35(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F36(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F37(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F38(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F39(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F40(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F41(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F42(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F43(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F44(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F45(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F46(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F47(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F48(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F49(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F50(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F51(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F52(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F53(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F54(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F55(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F56(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F57(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F58(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F59(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F60(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F61(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F62(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F63(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F64(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F65(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F66(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F67(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F68(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F69(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F70(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A1__F71(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F0(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F1(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F2(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F3(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F4(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F5(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F6(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F7(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F8(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F9(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F10(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F11(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F12(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F13(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F14(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F15(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F16(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F17(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F18(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F19(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F20(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F21(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F22(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F23(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F24(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F25(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F26(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F27(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F28(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F29(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F30(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F31(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F32(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F33(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F34(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F35(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F36(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F37(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F38(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F39(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F40(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F41(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F42(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F43(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F44(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F45(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F46(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F47(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F48(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F49(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F50(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F51(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F52(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F53(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F54(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F55(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F56(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F57(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F58(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F59(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F60(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F61(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F62(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F63(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F64(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F65(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F66(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F67(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F68(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F69(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F70(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A2__F71(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F0(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F1(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F2(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F3(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F4(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F5(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F6(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F7(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F8(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F9(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F10(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F11(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F12(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F13(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F14(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F15(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F16(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F17(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F18(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F19(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F20(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F21(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F22(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F23(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F24(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F25(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F26(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F27(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F28(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F29(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F30(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F31(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F32(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F33(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F34(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F35(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F36(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F37(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F38(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F39(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F40(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F41(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F42(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F43(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F44(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F45(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F46(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F47(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F48(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F49(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F50(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F51(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F52(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F53(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F54(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F55(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F56(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F57(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F58(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F59(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F60(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F61(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F62(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F63(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F64(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F65(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F66(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F67(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F68(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F69(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F70(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A3__F71(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F0(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F1(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F2(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F3(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F4(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F5(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F6(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F7(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F8(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F9(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F10(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F11(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F12(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F13(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F14(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F15(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F16(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F17(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F18(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F19(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F20(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F21(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F22(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F23(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F24(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F25(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F26(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F27(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F28(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F29(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F30(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F31(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F32(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F33(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F34(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F35(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F36(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F37(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F38(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F39(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F40(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F41(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F42(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F43(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F44(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F45(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F46(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F47(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F48(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F49(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F50(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F51(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F52(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F53(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F54(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F55(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F56(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F57(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F58(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F59(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F60(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F61(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F62(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F63(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F64(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F65(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F66(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F67(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F68(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F69(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F70(BwdPreprocessVarlenContext& params, int mod_number);
void Autotune_bwd_preprocess_varlen__A4__F71(BwdPreprocessVarlenContext& params, int mod_number);

}


}

// vim: set fileencoding=utf-8

