Code
// demo_neuralODE_complex_382A_adaptive_main.cpp (Zorro64 C++)

#include <zorro.h>   // required for C++ strategies in Zorro64
#include <math.h>

#define MAXS 4
#define TRI_A 6

// -------------------- Dimensions --------------------
#define N         48
#define K_SIZE    192   // MUST be literal: MAXS*N = 4*48 = 192

#define U_DIM     3     // exogenous input dimension u(t)
#define NOBS      12    // number of observed channels (partial observability)

// -------------------- Base "difficulty knobs" --------------------
#define T_END     20.0
#define DT_INIT   0.20
#define DT_MIN    1e-6
#define DT_MAX    0.50
#define TOL       1e-6

// -------------------- Shedding (memory-pressure) --------------------
#define MEM_BUDGET_MB 2000
static int G_ShedTriggered = 0;

// runtime knobs (can be adjusted by shedding)
static var G_Tol   = TOL;
static var G_DtMax = DT_MAX;

// safety budgets (NOT a max-steps cap)
static int G_MaxRejects     = 2000000;
static int G_DtMinHitMax    = 500000;

// dynamics-driven termination knobs
static var G_SteadyEps      = 1e-7;
static int G_SteadyNeed     = 250;
static var G_StateLimitRMS  = 1e6;
static var G_ObsLimitRMS    = 1e6;

// -------------------- SDE / hybrid knobs --------------------
static var G_SigmaBase      = 0.02;
static var G_JumpLambdaBase = 0.20;
static var G_JumpMagBase    = 0.20;
static var G_RegRate01      = 0.15;
static var G_RegRate10      = 0.10;

// -------------------- TIGHT_MEM types --------------------
#define TIGHT_MEM 1
#ifdef TIGHT_MEM
typedef float fvar;   // 4B
#else
typedef var fvar;     // 8B
#endif

// -------------------- Global working buffers (no malloc) --------------------
// In C++ globals are not auto-zeroed unless explicitly initialized in some cases,
// so we initialize all arrays to 0 to match lite-C expectations.
static fvar G_K[K_SIZE]     = { 0 };
static fvar G_act[N]        = { 0 };

static var  G_stage[N]      = { 0 };
static var  G_dh[N]         = { 0 };

// integrator work
static var  G_ycur[N]       = { 0 };
static var  G_full[N]       = { 0 };
static var  G_half[N]       = { 0 };
static var  G_mid[N]        = { 0 };

// initial/final buffers
static var  Y0[N]           = { 0 };
static var  Yref[N]         = { 0 };
static var  Ysafe[N]        = { 0 };
static var  Ybug[N]         = { 0 };

// partial observability buffers
static var  G_xhat[NOBS]    = { 0 };
static var  G_xobs[NOBS]    = { 0 };

// exogenous input u(t)
static var  G_u[U_DIM]      = { 0 };

// -------------------- RNG (deterministic, local; no Zorro data) --------------------
static unsigned int G_Rng = 1;

static void rng_seed(unsigned int s)
{
  if(s == 0) s = 1;
  G_Rng = s;
}

static var rng_u01()
{
  G_Rng = 1664525u * G_Rng + 1013904223u;
  unsigned int x = (G_Rng >> 8) & 0x00FFFFFFu;
  return ((var)x + 1.0) / 16777217.0; // (0,1)
}

static var rng_norm()
{
  var u1 = rng_u01();
  var u2 = rng_u01();
  var r  = sqrt(-2.0 * log(u1));
  var th = 6.283185307179586 * u2;
  return r * cos(th);
}

// -------------------- Utility --------------------
static int memMB()
{
  var m = memory(0);
  if(!(m > 0)) return 0;
  return (int)(m/(1024*1024));
}

static void shed_if_needed()
{
  if(G_ShedTriggered) return;

  int mb = memMB();
  if(mb >= MEM_BUDGET_MB)
  {
    G_ShedTriggered = 1;

    G_Tol *= 10.0;

    G_DtMax *= 0.5;
    if(G_DtMax < DT_MIN) G_DtMax = DT_MIN;

    G_MaxRejects  = (int)(G_MaxRejects/2);
    if(G_MaxRejects < 10000) G_MaxRejects = 10000;

    G_DtMinHitMax = (int)(G_DtMinHitMax/2);
    if(G_DtMinHitMax < 10000) G_DtMinHitMax = 10000;
  }
}

// -------------------- Triangular indexing for explicit A (lower triangle only) --------------------
static int Aidx(int i, int j)
{
  return (i*(i-1))/2 + j;
}

static var getA(float* Atri, int i, int j)
{
  if(i <= j) return 0;
  return (var)Atri[Aidx(i,j)];
}

static void setA(float* Atri, int i, int j, var v)
{
  if(i <= j) return;
  Atri[Aidx(i,j)] = (float)v;
}

static void zeroTableau(int* s, float* Atri, float* b, float* c)
{
  int i;
  *s = 0;
  for(i=0;i<TRI_A;i++) Atri[i] = 0;
  for(i=0;i<MAXS;i++) { b[i]=0; c[i]=0; }
}

// -------------------- Activation --------------------
static var softsign(var z)
{
  return z / (1.0 + fabs(z));
}

// -------------------- Exogenous input u(t) --------------------
static int G_Regime = 0; // 0 or 1

static void eval_u(var t)
{
  var u0 = sin(0.11*t);
  u0 *= 0.8;
  if(G_Regime) u0 += 0.35;

  var u1 = cos(0.37*t);
  u1 *= 0.6;

  var u2 = sin(2.30*t);
  u2 *= 0.3;

  G_u[0] = u0;
  G_u[1] = u1;
  G_u[2] = u2;
}

// -------------------- Time-varying parameters theta(t) --------------------
static void theta_t(var t, var* dampOut, var* ampOut, var* scaleOut, var* stiffOut)
{
  var damp  = -0.40;
  var amp   =  0.35;
  var scale =  0.06;

  var drift = sin(0.03*t);
  drift *= 0.25;

  var u0 = G_u[0];
  var u1 = G_u[1];

  if(G_Regime)
  {
    amp   *= 1.15;
    scale *= 1.10;
    damp  *= 1.05;
  }

  amp   *= (1.0 + 0.30*drift + 0.15*u0);
  scale *= (1.0 + 0.20*drift + 0.10*u1);
  damp  *= (1.0 + 0.15*drift - 0.10*u0);

  var stiff = 2.8;

  *dampOut  = damp;
  *ampOut   = amp;
  *scaleOut = scale;
  *stiffOut = stiff;
}

// -------------------- Hybrid regime switching (Markov) --------------------
static void update_regime(var t, var dt)
{
  var u0 = G_u[0];
  var u1 = G_u[1];

  var stress = fabs(u0) + 0.5*fabs(u1);

  var r01 = G_RegRate01 * (1.0 + 0.8*stress);
  var r10 = G_RegRate10 * (1.0 + 0.5*stress);

  var p;
  if(G_Regime == 0)
  {
    p = 1.0 - exp(-r01*dt);
    if(rng_u01() < p) G_Regime = 1;
  }
  else
  {
    p = 1.0 - exp(-r10*dt);
    if(rng_u01() < p) G_Regime = 0;
  }
}

// -------------------- Poisson jumps (shock events) --------------------
static void apply_jump_if_needed(var t, var dt, var* y)
{
  var u0  = G_u[0];
  var lam = G_JumpLambdaBase * (1.0 + 0.7*fabs(u0));
  if(G_Regime) lam *= 1.25;

  var p = 1.0 - exp(-lam*dt);
  if(rng_u01() >= p) return;

  var uu = rng_u01() - 0.5;
  var j  = tan(3.141592653589793 * uu);

  if(j >  6.0) j =  6.0;
  if(j < -6.0) j = -6.0;

  var mag = G_JumpMagBase;
  mag *= (1.0 + 0.5*fabs(u0));
  if(G_Regime) mag *= 1.2;

  int k;
  for(k=0;k<N;k++)
  {
    if(rng_u01() < 0.25)
    {
      var delta = mag * j;
      var kk = (k+1);
      delta *= (1.0 + 0.01*kk);
      y[k] += delta;
    }
  }
}

// -------------------- Partial observability --------------------
static void observe_from_hidden(var t, var* h)
{
  int k;
  for(k=0;k<NOBS;k++)
  {
    int j  = k;
    int j2 = k + NOBS;

    var a = h[j];
    var b = h[j2];

    var z = a + 0.30*b;
    z += 0.05*sin(0.17*t + 0.3*(k+1));

    var xhat = z / (1.0 + fabs(z));
    G_xhat[k] = xhat;

    var eta = 0.01 * rng_norm();
    G_xobs[k] = xhat + eta;
  }
}

// -------------------- Drift vector field f_theta(t,y,u) --------------------
static void f_theta_vec(var t)
{
  int i,j;

  eval_u(t);

  var damp, amp, scale, stiff;
  theta_t(t, &damp, &amp, &scale, &stiff);

  for(j=0;j<N;j++)
  {
    var jj = (j+1);

    var bj = sin(0.17*jj);
    bj *= 0.03;

    var u0 = G_u[0];
    var inj = u0;
    inj *= 0.02;
    inj *= sin(0.07*jj + 0.19*t);

    var hj = G_stage[j];
    var in = hj + bj + inj;

    G_act[j] = (fvar)softsign(in);
  }

  for(i=0;i<N;i++)
  {
    var sum = 0;
    var ii = (i+1);

    for(j=0;j<N;j++)
    {
      var jj = (j+1);

      var term1 = 0.013;
      term1 *= ii;
      term1 *= jj;

      var term2 = 0.11;
      term2 *= ii;

      var term3 = 0.07;
      term3 *= jj;

      var ang = term1 + term2 - term3;

      var w  = sin(ang);
      var ww = w * scale;

      var aj = (var)G_act[j];

      sum += ww * aj;
    }

    var phi = 0.05 * ii;

    var forcing = sin(t + phi) * amp;

    var fast = sin(20.0*t + 0.10*ii);
    fast *= 0.08;

    var u1 = G_u[1];
    var u2 = G_u[2];

    var exf = u1 * 0.10;
    exf += 0.05*u2*sin(0.9*t);

    forcing += fast + exf;

    var hi = G_stage[i];

    var tmp = hi * 1.8;
    var self = softsign(tmp) * 0.15;

    var di = damp;
    if((i & 7) == 0)
      di *= stiff;

    var ghi = hi * di;

    G_dh[i] = sum + ghi + forcing + self;
  }
}

// -------------------- Vector RK step (explicit) for DRIFT ONLY --------------------
static void rk_step_vec(int s, float* Atri, float* b, float* c,
                        var t, var* y, var h, var* yout)
{
  int i,j,k;

  if(s < 1)
  {
    for(k=0;k<N;k++) yout[k] = y[k];
    return;
  }
  if(s > MAXS) s = MAXS;

  for(i=0;i<s;i++)
    for(k=0;k<N;k++)
      G_K[i*N + k] = (fvar)0;

  for(i=0;i<s;i++)
  {
    for(k=0;k<N;k++)
    {
      var acc = y[k];

      for(j=0;j<i;j++)
      {
        var kij = (var)G_K[j*N + k];
        var aij = getA(Atri,i,j);
        acc += kij * aij * h;
      }

      G_stage[k] = acc;
    }

    var dt = ((var)c[i]) * h;

    f_theta_vec(t + dt);

    for(k=0;k<N;k++)
      G_K[i*N + k] = (fvar)G_dh[k];
  }

  for(k=0;k<N;k++)
  {
    var acc = y[k];

    for(i=0;i<s;i++)
    {
      var ki = (var)G_K[i*N + k];
      var bi = (var)b[i];
      acc += ki * bi * h;
    }

    yout[k] = acc;
  }
}

// -------------------- Norms --------------------
static var err_norm(var* a, var* b)
{
  int k;
  var s = 0;
  for(k=0;k<N;k++)
  {
    var d = a[k] - b[k];
    s += d*d;
  }
  return sqrt(s / (var)N);
}

static var rms_norm(var* a, int n)
{
  int k;
  var s = 0;
  for(k=0;k<n;k++)
  {
    var x = a[k];
    s += x*x;
  }
  return sqrt(s / (var)n);
}

// -------------------- Stage permutation -> equivalent tableau --------------------
static void permute_method_into(int s, float* A, float* b, float* c,
                                int* p,
                                int* sOut, float* Aout, float* bout, float* cout)
{
  int i,j;

  *sOut = s;
  if(*sOut < 0) *sOut = 0;
  if(*sOut > MAXS) *sOut = MAXS;

  for(i=0;i<TRI_A;i++) Aout[i] = 0;
  for(i=0;i<MAXS;i++)  { bout[i]=0; cout[i]=0; }

  for(i=0;i<*sOut;i++)
  {
    int pi = p[i];
    if(pi < 0) pi = 0;
    if(pi >= s) pi = s-1;
    bout[i] = b[pi];
    cout[i] = c[pi];
  }

  for(i=0;i<*sOut;i++)
    for(j=0;j<i;j++)
    {
      int pi = p[i];
      int pj = p[j];
      if(pi < 0) pi = 0;
      if(pj < 0) pj = 0;
      if(pi >= s) pi = s-1;
      if(pj >= s) pj = s-1;
      setA(Aout, i, j, getA(A, pi, pj));
    }
}

// -------------------- A 4-stage explicit RK with swappable middle stages --------------------
static void make_swappable_rk4(int* s, float* Atri, float* b, float* c)
{
  zeroTableau(s, Atri, b, c);
  *s = 4;

  c[0] = 0.0f;
  c[1] = 0.30f;
  c[2] = 0.80f;
  c[3] = 1.00f;

  setA(Atri, 1, 0, 0.30);
  setA(Atri, 2, 0, 0.80);

  setA(Atri, 3, 1, 0.50);
  setA(Atri, 3, 2, 0.50);

  b[0] = 0.10f;
  b[1] = 0.20f;
  b[2] = 0.30f;
  b[3] = 0.40f;
}

// -------------------- Diffusion (SDE) increment --------------------
static void apply_diffusion(var t, var dt, var* y)
{
  eval_u(t);

  var u0 = G_u[0];
  var u2 = G_u[2];

  var sigma = G_SigmaBase;
  sigma *= (1.0 + 0.6*fabs(u0) + 0.3*fabs(u2));
  if(G_Regime) sigma *= 1.25;

  var sdt = sqrt(dt);

  int k;
  for(k=0;k<N;k++)
  {
    var hk = y[k];

    var gmul = 1.0 + (fabs(hk) * 0.15);

    var xi = rng_norm();

    y[k] += sigma * gmul * sdt * xi;
  }
}

// -------------------- Adaptive integrator --------------------
static void integrate_adaptive_sde(int s, float* A, float* b, float* c,
                                   var t0, var* y0, var tEnd,
                                   unsigned int seed,
                                   var* yOut,
                                   int* stepsOut, int* rejectsOut,
                                   var* dtMinOut,
                                   int* stopReasonOut)
{
  rng_seed(seed);
  G_Regime = 0;

  int k;
  var t = t0;
  var dt = DT_INIT;
  var dtMinSeen = 1e9;

  for(k=0;k<N;k++)
    G_ycur[k] = y0[k];

  int steps = 0;
  int rejects = 0;
  int iters = 0;
  int dtMinHits = 0;
  int steadyCount = 0;

  int stopReason = 0;

  while(t < tEnd)
  {
    if((iters & 255) == 0) shed_if_needed();
    iters++;

    if(dt < DT_MIN) dt = DT_MIN;
    if(dt > G_DtMax) dt = G_DtMax;
    if(t + dt > tEnd) dt = tEnd - t;

    if(dt < dtMinSeen) dtMinSeen = dt;
    if(dt <= DT_MIN) dtMinHits++;

    if(dtMinHits > G_DtMinHitMax)
    {
      stopReason = 4;
      break;
    }

    rk_step_vec(s, A, b, c, t, G_ycur, dt, G_full);

    var halfdt = dt * 0.5;

    rk_step_vec(s, A, b, c, t,        G_ycur, halfdt, G_mid);
    rk_step_vec(s, A, b, c, t+halfdt, G_mid,  halfdt, G_half);

    var e = err_norm(G_half, G_full);

    if(e <= G_Tol || dt <= DT_MIN)
    {
      for(k=0;k<N;k++)
        G_ycur[k] = G_half[k];

      eval_u(t);
      update_regime(t, dt);

      apply_diffusion(t, dt, G_ycur);
      apply_jump_if_needed(t, dt, G_ycur);

      observe_from_hidden(t + dt, G_ycur);

      var yr = rms_norm(G_ycur, N);
      var xr = rms_norm(G_xobs, NOBS);
      if(yr > G_StateLimitRMS || xr > G_ObsLimitRMS)
      {
        stopReason = 2;
        t += dt;
        steps++;
        break;
      }

      for(k=0;k<N;k++) G_stage[k] = G_ycur[k];
      f_theta_vec(t + dt);
      var dr = rms_norm(G_dh, N);

      if(dr < G_SteadyEps) steadyCount++;
      else steadyCount = 0;

      t += dt;
      steps++;

      if(steadyCount >= G_SteadyNeed)
      {
        stopReason = 1;
        break;
      }

      if(e < 1e-16)
      {
        dt *= 2.0;
      }
      else
      {
        var fac = pow(G_Tol / e, 0.2) * 0.9;
        if(fac < 0.5) fac = 0.5;
        if(fac > 2.0) fac = 2.0;
        dt *= fac;
      }
    }
    else
    {
      rejects++;
      if(rejects > G_MaxRejects)
      {
        stopReason = 3;
        break;
      }
      dt *= 0.5;
    }
  }

  for(k=0;k<N;k++)
    yOut[k] = G_ycur[k];

  *stepsOut = steps;
  *rejectsOut = rejects;
  *dtMinOut = dtMinSeen;
  *stopReasonOut = stopReason;
}

// -------------------- Entry point (runs once) --------------------
DLLFUNC int main()
{
  // reset knobs
  G_ShedTriggered = 0;
  G_Tol   = TOL;
  G_DtMax = DT_MAX;

  G_MaxRejects  = 2000000;
  G_DtMinHitMax = 500000;

  G_SteadyEps     = 1e-7;
  G_SteadyNeed    = 250;
  G_StateLimitRMS = 1e6;
  G_ObsLimitRMS   = 1e6;

  G_SigmaBase       = 0.02;
  G_JumpLambdaBase  = 0.20;
  G_JumpMagBase     = 0.20;
  G_RegRate01       = 0.15;
  G_RegRate10       = 0.10;

  int   S = 0;
  float A[TRI_A];
  float b[MAXS];
  float c[MAXS];
  make_swappable_rk4(&S, A, b, c);

  int   SB = 0;
  float AB[TRI_A];
  float bB[MAXS];
  float cB[MAXS];

  int p[MAXS];
  p[0]=0; p[1]=2; p[2]=1; p[3]=3;
  permute_method_into(S, A, b, c, p, &SB, AB, bB, cB);

  int   SW = S;
  float AW[TRI_A];
  float bW[MAXS];
  float cW[MAXS];

  int i;
  for(i=0;i<TRI_A;i++) AW[i] = A[i];
  for(i=0;i<MAXS;i++)  { bW[i]=0; cW[i]=0; }
  for(i=0;i<SW;i++)
  {
    int pi = p[i];
    bW[i] = b[pi];
    cW[i] = c[pi];
  }

  int k;
  for(k=0;k<N;k++)
    Y0[k] = 0.2*sin(0.31*(k+1)) + 0.15*cos(0.17*(k+1))
          + 0.05*sin(0.07*(k+1)*(k+1));

  var t0 = 0.0;

  // ----------- seed for custom RNG -----------
  var now = wdate(NOW);
  unsigned int MySeed = (unsigned int)utm(now);

  var frac = now - floor(now);
  var ms = frac * 86400000.0;
  MySeed = 1664525u * MySeed + (unsigned int)ms + 1013904223u;

  if(MySeed == 0) MySeed = 1;
  printf("\nMySeed=%u", MySeed);
  // ------------------------------------------

  int stepsRef=0,  rejRef=0;   var dtMinRef=0;  int stopRef=0;
  int stepsSafe=0, rejSafe=0;  var dtMinSafe=0; int stopSafe=0;
  int stepsBug=0,  rejBug=0;   var dtMinBug=0;  int stopBug=0;

  integrate_adaptive_sde(S,  A,  b,  c,  t0, Y0, T_END, MySeed, Yref,
                         &stepsRef,  &rejRef,  &dtMinRef,  &stopRef);

  integrate_adaptive_sde(SB, AB, bB, cB, t0, Y0, T_END, MySeed, Ysafe,
                         &stepsSafe, &rejSafe, &dtMinSafe, &stopSafe);

  integrate_adaptive_sde(SW, AW, bW, cW, t0, Y0, T_END, MySeed, Ybug,
                         &stepsBug,  &rejBug,  &dtMinBug,  &stopBug);

  var diffSafe = err_norm(Ysafe, Yref);
  var diffBug  = err_norm(Ybug,  Yref);

  printf("\nNeural SDE / Hybrid ODE integration (N=%d) to T=%g  baseTol=%g", N, T_END, TOL);
  printf("\nAdds: u(t), theta(t), diffusion noise, regime switching, jumps, partial observation, multiscale/stiff modes.");
  printf("\nRuntime knobs end-state: G_Tol=%g  G_DtMax=%g  shed=%d  mem=%d MB",
         G_Tol, G_DtMax, G_ShedTriggered, memMB());

  printf("\n\nA) Reference tableau:");
  printf("\n   accepted=%d rejected=%d minDt=%g stop=%d", stepsRef, rejRef, dtMinRef, stopRef);

  printf("\nB) Theorem-382A safe stage-refactor:");
  printf("\n   accepted=%d rejected=%d minDt=%g stop=%d", stepsSafe, rejSafe, dtMinSafe, stopSafe);
  printf("\n   diff to reference (RMS) = %.3e", diffSafe);

  printf("\nC) Buggy stage-refactor:");
  printf("\n   accepted=%d rejected=%d minDt=%g stop=%d", stepsBug, rejBug, dtMinBug, stopBug);
  printf("\n   diff to reference (RMS) = %.3e", diffBug);

  printf("\n\nStop codes: 0=tEnd, 1=steady-state, 2=event(blow-up), 3=rejectBudget, 4=dtMinBudget\n");

  return quit("done");
}