import sympy as sp
import numpy as np
import math as mth
from typing import Literal
import re
import os
import subprocess
import ctypes
import warnings
import shutil
import json
from ..Backend import hardware as HW
from . import templates
from ..types import NodeConfig
[docs]
class SymbolicJITCompiler:
"""
The **SymbolicJITCompiler** is the computational heart of HeteroSymNN. It is responsible for transforming
high-level symbolic definitions of mathematical functions (activations and losses) into highly optimized,
hardware-specific executable kernels at runtime.
This compiler bridges the gap between flexibility and performance by leveraging SymPy for symbolic
differentiation and code generation, and then compiling that code into:
* **CUDA Kernels (GPU_CUDA):** For massive parallelism on NVIDIA GPUs using CuPy.
* **C++ Shared Libraries (CPU_JIT):** For high-performance CPU execution using OpenMP and system compilers (MSVC/GCC).
* **Python Lambdas (CPU_PYTHON):** As a fallback for maximum compatibility.
It handles the automatic differentiation of user-defined formulas, manages the compilation cache to avoid
redundant work, and provides a unified interface (`forward_kernel`, `backward_kernel`) for the rest of the
library to execute these functions without worrying about the underlying hardware implementation.
Parameters
----------
configs : list[:obj:`~HeteroSymNN.types.NodeConfig`]
A list of configurations defining the functions to compile.
For 'activation' mode, this is a list of (function_name_or_expression, constants_dict) for each node.
For 'loss' mode, this is a list containing a single tuple with the loss expression and its constants.
calculation_method : Literal["GPU_CUDA", "CPU_JIT", "CPU_PYTHON"]
The target backend for compilation.
device_id : int
The ID of the GPU device to use if compiling for CUDA.
mode : Literal["activation", "loss"], optional
The type of function being compiled. Determines the kernel signature and symbolic variables used
('num' for activations, 'y_pred'/'y_true' for losses). Defaults to "activation".
Attributes
----------
forward_kernel : Callable
The compiled executable function for the forward pass.
backward_kernel : Callable
The compiled executable function for the backward pass (gradient calculation).
calculation_method : str
The current active calculation method.
device_id : int
The current GPU ID.
Examples
--------
Although this class is primarily used internally, it can be instantiated for testing custom symbolic expressions.
>>> import numpy as np
>>> from HeteroSymNN.JIT.compiler import SymbolicJITCompiler
>>>
>>> configs = [("Max(0, num)", {})]
>>> method = "CPU_PYTHON"
>>> mode = "activation"
>>>
>>> py_compiler = SymbolicJITCompiler(
... configs=configs,
... calculation_method=method,
... mode=mode
... )
>>>
>>> configs_2 =[("d*y_pred-y_true", {"d": 2.0})]
>>> method_2 = "GPU_CUDA"
>>> mode_2 = "loss"
>>> device_id = 0
>>>
>>> cuda_compiler = SymbolicJITCompiler(
... configs=configs_2,
... calculation_method=method_2,
... device_id=device_id,
... mode=mode_2
... )
"""
def __init__(self, configs: list[NodeConfig], calculation_method: Literal["GPU_CUDA","CPU_JIT","CPU_PYTHON"],
device_id:int,mode: Literal["activation", "loss"] = "activation"):
"""
Initializes the JIT compiler and compiles the kernels for the requested backend.
"""
self.calculation_method = calculation_method
self.device_id = device_id
self.func_ids_cpu = []
self.func_ids_gpu = None
self.forward_kernel = None
self.backward_kernel = None
self.func_ids = self.func_ids_cpu
self.activation_funcs = configs
self.mode = mode
self.main_vars = [sp.symbols('num', real=True)]
if (mode == "loss"):
self.main_vars = [sp.symbols('y_pred', real=True), sp.symbols('y_true', real=True)]
self.deriv_target = self.main_vars[0]
self.func_ids = self.func_ids_cpu
if self.calculation_method == 'GPU_CUDA':
if device_id >= HW.NUM_GPUS:
raise ValueError(f"ID de GPU {device_id} no es válido. GPUs disponibles: {HW.NUM_GPUS}")
self._compile_cuda_kernels(configs)
self.func_ids_gpu = HW.be.array(self.func_ids_cpu,dtype=HW.be.int32)
self.func_ids = self.func_ids_gpu
elif (calculation_method == "CPU_CPP"):
self._compile_cpp_kernels(configs)
elif ( calculation_method == "CPU_PYTHON"):
self._compile_py_kernels(configs)
else:
raise ValueError("Calculation Method no es GPU_CUDA, CPU_CPP o CPU_PYTHON")
def _get_ccode_from_config(self, func_str:str, constants:dict[str, float]):
"""
Internal method that parses a string expression into SymPy expressions for the function and its derivative.
Parameters
----------
func_str : str
The mathematical expression string (e.g., "Max(0, num)").
constants : dict[str, float]
Dictionary of constant values used in the expression.
Returns
-------
tuple[sympy.Expr, sympy.Expr]
A tuple containing the symbolic expression for the function and its derivative.
"""
constants = constants or {}
x_sym,z_sym = sp.symbols("x z")
local_dict = {'e': mth.e, 'pi': mth.pi, 'tau': mth.tau, 'phi': (1 + mth.sqrt(5)) / 2}
num_sym = sp.symbols("num")
if (self.mode == "activation"):
num_sym = self.main_vars[0]
for var in self.main_vars:
local_dict[str(var)] = var
for_subs = {}
sorted_constatanst = sorted(constants)
for id,key in enumerate(sorted_constatanst):
for_subs[sp.symbols(key)] = sp.symbols(f"params[offset+{id}]")
if (func_str in templates.COMMON_FORMULAS):
func_str = templates.COMMON_FORMULAS[func_str]
temp = {}
for key in templates.COMMON_FORMULAS.keys():
temp[key] = sp.parse_expr(templates.COMMON_FORMULAS[key],local_dict=local_dict)
local_dict = local_dict | temp
func_expr = sp.parse_expr(func_str,local_dict=local_dict,evaluate=False)
func_expr = func_expr.subs(for_subs)
free_symb = func_expr.free_symbols
main_vars_found = []
if (num_sym in free_symb):
main_vars_found.append(num_sym)
if ((x_sym in free_symb)and not("z" in constants.keys())):
main_vars_found.append(x_sym)
if ((z_sym in free_symb)and not("x" in constants.keys())):
main_vars_found.append(z_sym)
if (len(main_vars_found)>1):
if (HW.WARNINGS_STRICT_MODE):
raise ValueError(f"Función {func_str} contiene {', '.join([str(x) for x in main_vars_found])} como variables primarias, por favor de solo elegir una.")
else:
warnings.warn(f"La función {func_str} tratará {', '.join([str(x) for x in main_vars_found])} como variables primarias.")
func_expr = func_expr.subs({x_sym:num_sym,z_sym:num_sym})
if (callable(func_expr)):
func_expr = func_expr(*self.main_vars)
deriv_expr_subbed = sp.diff(func_expr, self.deriv_target)
return (func_expr, deriv_expr_subbed)
def _generate_kernel_artifacts(self, configs: list[NodeConfig],
target_key: Literal["CPP","PY","GPU"],mode: Literal['string', 'lambda'],
user_funcs: dict = None, float_regex: re.Pattern = None):
"""
Internal method that generates the core logic for the kernels, either as C++/CUDA code strings or Python lambdas.
This method handles the translation from SymPy expressions to the target language and manages
the kernel cache to avoid re-parsing identical expressions.
Parameters
----------
configs : list[:obj:`~HeteroSymNN.types.NodeConfig`]
List of function configurations.
target_key : Literal["CPP", "PY", "GPU"]
Key suffix for the cache to distinguish between backends.
mode : Literal['string', 'lambda']
Output format: 'string' for C++/CUDA code, 'lambda' for Python functions.
user_funcs : dict, optional
Dictionary mapping SymPy functions to target language functions (e.g., {'sin': 'sinf'}).
float_regex : re.Pattern, optional
Regex to enforce float literals (e.g., 1.0 -> 1.0f) for C++/CUDA.
Returns
-------
tuple[str, str] or dict
If mode is 'string', returns (forward_cases, backward_cases) strings for a switch statement.
If mode is 'lambda', returns a dictionary mapping IDs to (forward_func, backward_func).
"""
unique_funcs = {}
compiled_code = {}
for func_str, consts in configs:
consts_key = frozenset(consts.items())
func_key = (func_str, consts_key, target_key,self.mode)
if not(func_key in unique_funcs):
new_id = len(unique_funcs)
unique_funcs[func_key] = new_id
if ((HW.USE_KERNEL_CACHE) and (func_key in HW.KERNEL_CACHE)):
compiled_code[new_id] = HW.KERNEL_CACHE[func_key]
else:
# Generar código base
func_expr, deriv_expr = self._get_ccode_from_config(func_str, consts)
if (mode == 'string'):
func_expr = func_expr.rewrite(sp.Piecewise)
deriv_expr = deriv_expr.rewrite(sp.Piecewise)
ccode_fwd = sp.printing.ccode(func_expr, user_functions=user_funcs)
ccode_bwd = sp.printing.ccode(deriv_expr, user_functions=user_funcs)
if float_regex:
ccode_fwd = float_regex.sub(r"\1f", ccode_fwd)
ccode_bwd = float_regex.sub(r"\1f", ccode_bwd)
compiled_code[new_id] = (ccode_fwd, ccode_bwd)
elif (mode == 'lambda'):
p_sym = sp.symbols('params')
off_sym = sp.symbols('offset')
# Convertir a funciones lambda de Python
lambda_args = self.main_vars + [p_sym, off_sym]
ccode_fwd = sp.lambdify(lambda_args, func_expr, 'numpy')
ccode_bwd = sp.lambdify(lambda_args, deriv_expr, 'numpy')
compiled_code[new_id] = (ccode_fwd, ccode_bwd)
if (HW.USE_KERNEL_CACHE):
HW.KERNEL_CACHE[func_key] = compiled_code[new_id]
self.func_ids_cpu.append(unique_funcs[func_key])
if(mode == 'string'):
fwd_cases = "\n".join([f" case {fid}: return {code[0]};" for fid, code in compiled_code.items()])
bwd_cases = "\n".join([f" case {fid}: return {code[1]};" for fid, code in compiled_code.items()])
return fwd_cases, bwd_cases
return compiled_code
def _compile_cpp_kernels(self, configs:list[NodeConfig]):
"""
Internal method to compile the symbolic expressions into a C++ shared library (.dll/.so) and load it via ctypes.
This method generates C++ code with OpenMP pragmas for parallelism, compiles it using the
system's C++ compiler (MSVC or GCC), and creates Python wrappers for the exported functions.
Parameters
----------
configs : list[:obj:`~HeteroSymNN.types.NodeConfig`]
List of function configurations to compile.
Raises
------
Exception
If the C++ compiler is not found or compilation fails. If strict warnings mode is false will try with "CPU_PYTHON" backend.
"""
if (HW.CPP_INSTALLED_COMPILER == None):
raise Exception("CPP_JIT_ENABLED era True, pero CPP_COMPILER_NAME es None.")
fwd_switch_cases, bwd_switch_cases =self._generate_kernel_artifacts(configs, "CPP", mode='string', user_funcs=templates.CPP_USER_FUNCS)
# Plantilla de código C++ con OpenMP para paralelización
if self.mode == "activation":
# SymPy usa 'num', lo cambiamos por el nombre del argumento C++
fwd_cases = fwd_switch_cases.replace("num", "z_val")
bwd_cases = bwd_switch_cases.replace("num", "z_val")
cpp_template = templates.CPP_KERNEL_TEMPLATE_ACTIVATION.substitute({"fwd_cases":fwd_cases,"bwd_cases":bwd_cases})
else: # LOSS
# SymPy usa 'y_pred' y 'y_true'
cpp_template = templates.CPP_KERNEL_TEMPLATE_LOSS.substitute({"fwd_switch_cases":fwd_switch_cases,"bwd_switch_cases":bwd_switch_cases})
# --- Compilación JIT (la parte complicada) ---
try:
# Nombres de archivos temporales
# Usamos un hash de la config para cachear la librería compilada
import hashlib
config_hash = hashlib.md5(json.dumps(configs,sort_keys=True).encode()+self.mode.encode()).hexdigest()
temp_dir = HW.CPU_CACHE_DIR
os.makedirs(temp_dir, exist_ok=True)
lib_name = f"kernel_{self.mode}_{config_hash}"
src_path = os.path.join(temp_dir, f"{lib_name}.cpp")
extencion = "dll"
if not(os.name in ["nt","Windows"]):
extencion = "so"
lib_path = os.path.join(temp_dir, f"{lib_name}."+extencion)
if (HW.CPP_INSTALLED_COMPILER == "cl.exe"):
compile_cmd = [
'cl.exe', '/O2', '/LD', # Optimizar y crear DLL
'/openmp', "/fp:fast", # Habilitar OpenMP
'/Fe' + lib_path, # Archivo de salida
'/EHsc', # Manejo de excepciones
src_path
]
else:
compile_cmd = [
HW.CPP_INSTALLED_COMPILER, '-O3', '-shared', '-fPIC', '-fopenmp',
"-ffast-math", src_path, '-o', lib_path
]
# Si la librería ya existe, no la re-compilamos
if not (os.path.exists(lib_path)):
with open(src_path, 'w') as f:
f.write(cpp_template)
try:
compile_result = subprocess.run(compile_cmd, check=False, capture_output=True, text=True)
if compile_result.returncode != 0:
raise Exception(f"Falló la compilación C++ JIT. {compile_result.stderr}")
except Exception:
compiler_path = shutil.which(HW.CPP_INSTALLED_COMPILER)
try:
dlls_dir = os.path.dirname(compiler_path)
os.add_dll_directory(dlls_dir)
except Exception:
os.environ['PATH'] = dlls_dir + os.pathsep + os.environ['PATH']
compile_result = subprocess.run(compile_cmd, check=False, capture_output=True, text=True)
if compile_result.returncode != 0:
raise Exception(f"Falló la compilación C++ JIT. {compile_result.stderr}")
try:
lib = ctypes.CDLL(lib_path)
except Exception:
compiler_path = shutil.which(HW.CPP_INSTALLED_COMPILER)
try:
dlls_dir = os.path.dirname(compiler_path)
os.add_dll_directory(dlls_dir)
except Exception:
os.environ['PATH'] = dlls_dir + os.pathsep + os.environ['PATH']
lib = ctypes.CDLL(lib_path)
P_FLOAT = ctypes.POINTER(ctypes.c_float)
P_INT = ctypes.POINTER(ctypes.c_int)
C_INT = ctypes.c_int
self.func_ids_array_np = np.array(self.func_ids_cpu, dtype=np.int32)
func_ids_ptr = self.func_ids_array_np.ctypes.data_as(P_INT)
# Wrappers Python -> C
if self.mode == "activation":
f_func = lib.forward_activation_kernel
f_func.argtypes = [P_FLOAT, P_FLOAT, P_INT, P_FLOAT, P_INT,C_INT, C_INT, C_INT]
b_func = lib.backward_delta_kernel
b_func.argtypes = [P_FLOAT, P_FLOAT, P_FLOAT, P_INT, P_FLOAT, P_INT, C_INT, C_INT, C_INT]
def f_wrapper(z, a, params, offset_list, n, b):
# z y a son arrays de numpy (float32)
f_func(
z.ctypes.data_as(P_FLOAT),
a.ctypes.data_as(P_FLOAT),
func_ids_ptr, params.ctypes.data_as(P_FLOAT),
offset_list.ctypes.data_as(P_INT), n, b, n * b
)
def b_wrapper(z, err, delta, params, offset_list, n, b):
b_func(
z.ctypes.data_as(P_FLOAT),
err.ctypes.data_as(P_FLOAT),
delta.ctypes.data_as(P_FLOAT),
func_ids_ptr, params.ctypes.data_as(P_FLOAT),
offset_list.ctypes.data_as(P_INT), n, b, n * b
)
else: # LOSS
f_func = lib.loss_kernel_fwd
f_func.argtypes = [P_FLOAT, P_FLOAT, P_FLOAT, P_INT, P_FLOAT,C_INT]
b_func = lib.loss_kernel_bwd
b_func.argtypes = [P_FLOAT, P_FLOAT, P_FLOAT, P_INT, P_FLOAT,C_INT]
def f_wrapper(yp, yt, res, params):
f_func(
yp.ctypes.data_as(P_FLOAT),
yt.ctypes.data_as(P_FLOAT),
res.ctypes.data_as(P_FLOAT),
func_ids_ptr,params.ctypes.data_as(P_FLOAT), yp.size
)
def b_wrapper(yp, yt, grad, params):
b_func(
yp.ctypes.data_as(P_FLOAT),
yt.ctypes.data_as(P_FLOAT),
grad.ctypes.data_as(P_FLOAT),
func_ids_ptr, params.ctypes.data_as(P_FLOAT),yp.size
)
self.forward_kernel = f_wrapper
self.backward_kernel = b_wrapper
except Exception as e:
if (HW.WARNINGS_STRICT_MODE):
raise (f"¡ERROR FATAL DE COMPILACIÓN C++ JIT!") from e
else:
full_warning = f"¡ERROR FATAL DE COMPILACIÓN C++ JIT! {e} "+"Causa probable: No se encontró un compilador C++ (g++ o cl.exe) en el PATH del sistema o falló OpenMP."
full_warning += " Usando el kernel de Python (lento) como fallback."
warnings.warn(full_warning)
self._change_method("CPU_PYTHON") # Fallback al modo lento
def _compile_py_kernels(self,configs:list[NodeConfig]):
"""
Compiles the symbolic expressions into Python lambda functions using `sympy.lambdify`.
This serves as a fallback backend that works on any system with NumPy, though it is
significantly slower than the compiled C++ or CUDA kernels.
Parameters
----------
configs : list[:obj:`~HeteroSymNN.types.NodeConfig`]
List of function configurations to compile.
"""
compiled = self._generate_kernel_artifacts(configs, "PY_LAMBDA", mode='lambda')
if self.mode == "activation":
# Kernel vectorizado optimizado para activaciones
first_id = int(self.func_ids[0])
is_homogeneous = all(fid == first_id for fid in self.func_ids)
if is_homogeneous:
func_fwd, func_bwd = compiled[first_id]
def f_kernel(z, a,params, offset_list, n, b):
num_params = len(params)
if (len(offset_list)>1):
num_params = offset_list[0]-offset_list[1]
matrix_params = params.reshape(n,num_params)
param_cols = [matrix_params[:, i].reshape(-1, 1) for i in range(num_params)]
a[:] = func_fwd(z,param_cols,0)
def b_kernel(z, err, d,params, offset_list,n, b):
num_params = len(params)
if (len(offset_list)>1):
num_params = offset_list[0]-offset_list[1]
matrix_params = params.reshape(n,num_params)
param_cols = [matrix_params[:, i].reshape(-1, 1) for i in range(num_params)]
d[:] = err * func_bwd(z,param_cols,0)
else:
def f_kernel(z, a, params,offset_list, n, b):
for j in range(n):
a[j,:] = compiled[self.func_ids[j]][0](z[j,:],params,offset_list[j])
def b_kernel(z, err, d, params,offset_list, n, b):
for j in range(n):
d[j,:] = err[j,:] * compiled[self.func_ids[j]][1](z[j,:],params,offset_list[j])
else:
func_fwd, func_bwd = compiled[int(self.func_ids[0])]
def f_kernel(y_p, y_t, res_vec,params): res_vec[:] = func_fwd(y_p, y_t,params,0)
def b_kernel(y_p, y_t, grad_vec,params): grad_vec[:] = func_bwd(y_p, y_t,params,0)
self.forward_kernel = f_kernel
self.backward_kernel = b_kernel
def _compile_cuda_kernels(self, configs:list[NodeConfig]):
"""
Compiles the symbolic expressions into CUDA kernels using CuPy.
This method generates CUDA C code, compiles it into a CuPy RawKernel, and sets up
grid/block dimensions for execution on the GPU.
Parameters
----------
configs : list[:obj:`~HeteroSymNN.types.NodeConfig`]
List of function configurations to compile.
Raises
------
RuntimeError
If CUDA compilation fails and strict mode is enabled. If strict warnings mode is false will try with "CPU_JIT" backend.
"""
float_regex = re.compile(r"(\d+\.\d*([eE][+-]?\d+)?)")
fwd_switch_cases, bwd_switch_cases = self._generate_kernel_artifacts(configs, "GPU", mode='string',
user_funcs=templates.CUDA_USER_FUNCS,
float_regex=float_regex)
if self.mode == "activation":
fwd_cases = fwd_switch_cases.replace("num", "z_val")
bwd_cases = bwd_switch_cases.replace("num", "z_val")
template = templates.CUDA_KERNEL_TEMPLATE_ACTIVATION.substitute({"fwd_cases":fwd_cases,"bwd_cases":bwd_cases})
kernel_names = ["forward_activation_kernel", "backward_delta_kernel"]
else: # LOSS
template = templates.CUDA_KERNEL_TEMPLATE_LOSS.substitute({"fwd_switch_cases":fwd_switch_cases,"bwd_switch_cases":bwd_switch_cases})
kernel_names = ["loss_kernel_fwd", "loss_kernel_bwd"]
try:
with HW.be.cuda.Device(self.device_id):
fwd_k = HW.be.RawKernel(template, kernel_names[0])
bwd_k = HW.be.RawKernel(template, kernel_names[1])
except Exception as e:
error_message = f"¡ERROR FATAL DE COMPILACIÓN CUDA JIT! {e}"
if HW.WARNINGS_STRICT_MODE:
raise RuntimeError(error_message) from e
else:
full_warning = (error_message +
" Causa probable: Error en la generación del kernel de CUDA o fallo de CuPy." +
" Usando el kernel de CPU como fallback.")
warnings.warn(full_warning)
self._change_method("CPU_JIT")
return
# Wrappers
if self.mode == "activation":
def f_k_wrapper(z, a, params, offset_list, n, b):
tot = n * b
grid, block = HW._get_cuda_dims(tot, self.device_id)
fwd_k(grid, block, (z, a, self.func_ids, params, offset_list, n, b, tot))
def b_k_wrapper(z, err, d,params,offset_list, n, b):
tot = n * b
grid, block = HW._get_cuda_dims(tot, self.device_id)
bwd_k(grid, block, (z, err, d, self.func_ids,params,offset_list, n, b, tot))
else:
def f_k_wrapper(yp, yt, res,params):
n = yp.size
grid, block = HW._get_cuda_dims(n, self.device_id)
fwd_k(grid, block, (yp, yt, res, self.func_ids,params, n))
def b_k_wrapper(yp, yt, grad,params):
n = yp.size
grid, block = HW._get_cuda_dims(n, self.device_id)
bwd_k(grid, block, (yp, yt, grad, self.func_ids,params, n))
self.forward_kernel = f_k_wrapper
self.backward_kernel = b_k_wrapper
def _change_method(self,new_calculatuion_method:Literal["GPU_CUDA","CPU_JIT","CPU_PYTHON"],gpu_id:int):
"""
Internal method to change the calculation backend.
This triggers a recompilation of the kernels for the new backend.
Parameters
----------
new_calculatuion_method : Literal["GPU_CUDA", "CPU_JIT", "CPU_PYTHON"]
The new backend to switch to.
gpu_id : int
The GPU ID to use if switching to CUDA.
Returns
-------
Literal["GPU_CUDA", "CPU_JIT", "CPU_PYTHON"]
The actual calculation method set (might differ from requested if fallback occurs).
"""
if(new_calculatuion_method != self.calculation_method):
if ((new_calculatuion_method == "GPU_CUDA") and (HW.GPU_ENABLED)):
if (gpu_id >= HW.NUM_GPUS):
raise ValueError(f"ID de GPU {gpu_id} no es válido. GPUs disponibles: {HW.NUM_GPUS}")
self.device_id = gpu_id
self.func_ids_cpu = []
self.calculation_method = "GPU_CUDA"
self._compile_cuda_kernels(self.activation_funcs)
self.func_ids_gpu = HW.be.array(self.func_ids_cpu,dtype=HW.be.int32)
self.func_ids = self.func_ids_gpu
elif ((new_calculatuion_method == "CPU_JIT")and(HW.CPP_JIT_ENABLED)):
self.func_ids_cpu = []
self.calculation_method = "CPU_JIT"
self._compile_cpp_kernels(self.activation_funcs)
self.func_ids = self.func_ids_cpu
elif (new_calculatuion_method == "CPU_PYTHON"):
self.func_ids_cpu = []
self.calculation_method = "CPU_PYTHON"
self._compile_py_kernels(self.activation_funcs)
self.func_ids = self.func_ids_cpu
return self.calculation_method
[docs]
def set_gpu_id(self,new_id:int):
"""
Updates the active GPU ID and recompiles CUDA kernels if necessary.
Parameters
----------
new_id : int
The new GPU device ID.
"""
if (new_id != self.device_id):
self.device_id = new_id
if (self.calculation_method == "GPU_CUDA"):
self._compile_cuda_kernels(self.activation_funcs)
self.func_ids_gpu = HW.be.array(self.func_ids_cpu,dtype=HW.be.int32)
self.func_ids = self.func_ids_gpu