PrismWeave Regime Switcher is a portfolio controller that watches many currency pairs at once and ranks them by how clean and dependable their behavior looks right now. It treats each pair as a member of a living network and rebuilds that network on a repeating schedule. For every update, it extracts a compact set of nine “aspects” for each pair. These aspects describe short movement, longer movement, current volatility, deviation from a reference level, range pressure, activity flow, a simple regime flag, volatility of volatility, and persistence. All aspects are stored in a memory efficient ring buffer that keeps only the recent window needed for analysis.

The heart of the system is a relationship engine that measures how similarly pairs behave across all aspects. It computes a full pair to pair similarity table and blends it with an exposure distance table so the network reflects both market co movement and structural overlap. Because this matrix is heavy to compute, the strategy can optionally offload the pairwise correlation workload to a graphics device through OpenCL. If OpenCL is missing or fails at any stage, the system automatically falls back to a CPU path without changing results flow.

Once the network is built, the strategy converts similarities into distances and runs a shortest path sweep so that indirect links between pairs are respected. From the resulting distances it derives a compactness score for each pair. A pair becomes more attractive when it sits in a coherent neighborhood and its internal distances remain small after the network is relaxed. A simple market regime detector runs in parallel using average volatility across the universe, producing a coarse regime label used as an additional bias.

On top of the raw scores sits a learning controller. It combines lightweight clustering, reinforcement style adjustment, and a compact projection layer that tracks dominance and rotation. Optional mixture and hidden state modules estimate stability and uncertainty and apply cooldown when the system becomes confused. The controller then adapts ranking aggressiveness, the number of selected pairs, and overall risk scaling. The final output is a rotating top list of pairs that the strategy believes are currently the best aligned with the prevailing regime and network structure.

Code
// TGr06C_RegimeSwitcher_v7.cpp - Zorro64 Strategy DLL
// Strategy C v7: Regime-Switching with MX06 OOP + OpenCL + Learning Controller
// Notes:
// - Keeps full CPU fallback.
// - OpenCL is optional: if OpenCL.dll missing / no device / kernel build fails -> CPU path.
// - OpenCL accelerates the heavy correlation matrix step by offloading pairwise correlations.
// - Correlation is computed in float on GPU; results are stored back into fvar corrMatrix.

#define _CRT_SECURE_NO_WARNINGS
#include <zorro.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <windows.h>
#include <stddef.h>

#define INF 1e30
#define EPS 1e-12
#define N_ASSETS 28
#define FEAT_N 9
#define FEAT_WINDOW 200
#define UPDATE_EVERY 4
#define TOP_K 5

#define ALPHA 0.5
#define BETA 0.2
#define GAMMA 2.0
#define LAMBDA_META 0.6

#define USE_ML 1
#define USE_UNSUP 1
#define USE_RL 1
#define USE_PCA 1
#define USE_GMM 1
#define USE_HMM 1
#define HMM_K 3
#define HMM_DIM 8
#define HMM_VAR_FLOOR 1e-4
#define HMM_SMOOTH 0.02
#define HMM_ENTROPY_TH 0.85
#define HMM_SWITCH_TH 0.35
#define HMM_MIN_RISK 0.25
#define HMM_COOLDOWN_UPDATES 2
#define HMM_ONLINE_UPDATE 1
#define GMM_K 3
#define GMM_DIM 8
#define GMM_ALPHA 0.02
#define GMM_VAR_FLOOR 1e-4
#define GMM_ENTROPY_COEFF 0.45
#define GMM_MIN_RISK 0.25
#define GMM_ONLINE_UPDATE 1
#define STRATEGY_PROFILE 2
#define PCA_DIM 6
#define PCA_COMP 3
#define PCA_WINDOW 128
#define PCA_REBUILD_EVERY 4

#ifdef TIGHT_MEM
typedef float fvar;
#else
typedef double fvar;
#endif

static const char* ASSET_NAMES[] = {
  "EURUSD","GBPUSD","USDCHF","USDJPY","AUDUSD","AUDCAD","AUDCHF","AUDJPY","AUDNZD",
  "CADJPY","CADCHF","EURAUD","EURCAD","EURCHF","EURGBP","EURJPY","EURNZD","GBPAUD",
  "GBPCAD","GBPCHF","GBPJPY","GBPNZD","NZDCAD","NZDCHF","NZDJPY","NZDUSD","USDCAD"
};
static const char* CURRENCIES[] = {"EUR","GBP","USD","CHF","JPY","AUD","CAD","NZD"};
#define N_CURRENCIES 8

// ---------------------------- Exposure Table ----------------------------

struct ExposureTable {
  int exposure[N_ASSETS][N_CURRENCIES];
  double exposureDist[N_ASSETS][N_ASSETS];

  void init() {
    for(int i=0;i<N_ASSETS;i++){
      for(int c=0;c<N_CURRENCIES;c++){
        exposure[i][c] = 0;
      }
    }
    for(int i=0;i<N_ASSETS;i++){
      for(int j=0;j<N_ASSETS;j++){
        exposureDist[i][j] = 0.0;
      }
    }
  }

  inline double getDist(int i,int j) const { return exposureDist[i][j]; }
};

// ---------------------------- Slab Allocator ----------------------------

template<typename T>
class SlabAllocator {
public:
  T* data;
  int capacity;

  SlabAllocator() : data(NULL), capacity(0) {}
  ~SlabAllocator() { shutdown(); }

  void init(int size) {
    shutdown();
    capacity = size;
    data = (T*)malloc((size_t)capacity * sizeof(T));
    if(data) memset(data, 0, (size_t)capacity * sizeof(T));
  }

  void shutdown() {
    if(data) free(data);
    data = NULL;
    capacity = 0;
  }

  T& operator[](int i) { return data[i]; }
  const T& operator[](int i) const { return data[i]; }
};

// ---------------------------- Feature Buffer (SoA ring) ----------------------------

struct FeatureBufferSoA {
  SlabAllocator<fvar> buffer;
  int windowSize;
  int currentIndex;

  void init(int assets, int window) {
    windowSize = window;
    currentIndex = 0;
    buffer.init(FEAT_N * assets * window);
  }

  void shutdown() { buffer.shutdown(); }

  inline int offset(int feat,int asset,int t) const {
    return (feat * N_ASSETS + asset) * windowSize + t;
  }

  void push(int feat,int asset,fvar value) {
    buffer[offset(feat, asset, currentIndex)] = value;
    currentIndex = (currentIndex + 1) % windowSize;
  }

  // t=0 => most recent
  fvar get(int feat,int asset,int t) const {
    int idx = (currentIndex - 1 - t + windowSize) % windowSize;
    return buffer[offset(feat, asset, idx)];
  }
};

// ---------------------------- Minimal OpenCL (dynamic) ----------------------------

typedef struct _cl_platform_id*   cl_platform_id;
typedef struct _cl_device_id*     cl_device_id;
typedef struct _cl_context*       cl_context;
typedef struct _cl_command_queue* cl_command_queue;
typedef struct _cl_program*       cl_program;
typedef struct _cl_kernel*        cl_kernel;
typedef struct _cl_mem*           cl_mem;
typedef unsigned int              cl_uint;
typedef int                       cl_int;
typedef unsigned long long        cl_ulong;
typedef size_t                    cl_bool;

#define CL_SUCCESS 0
#define CL_DEVICE_TYPE_CPU (1ULL << 1)
#define CL_DEVICE_TYPE_GPU (1ULL << 2)
#define CL_MEM_READ_ONLY   (1ULL << 2)
#define CL_MEM_WRITE_ONLY  (1ULL << 1)
#define CL_MEM_READ_WRITE  (1ULL << 0)
#define CL_TRUE  1
#define CL_FALSE 0
#define CL_PROGRAM_BUILD_LOG 0x1183

class OpenCLBackend {
public:
  HMODULE hOpenCL;
  int ready;

  cl_platform_id platform;
  cl_device_id device;
  cl_context context;
  cl_command_queue queue;
  cl_program program;
  cl_kernel kCorr;

  cl_mem bufFeat;
  cl_mem bufCorr;

  int featBytes;
  int corrBytes;

  cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*);
  cl_int (*clGetDeviceIDs)(cl_platform_id, cl_ulong, cl_uint, cl_device_id*, cl_uint*);
  cl_context (*clCreateContext)(void*, cl_uint, const cl_device_id*, void*, void*, cl_int*);
  cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_ulong, cl_int*);
  cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*);
  cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void*, void*);
  cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_uint, size_t, void*, size_t*);
  cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*);
  cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*);
  cl_mem (*clCreateBuffer)(cl_context, cl_ulong, size_t, void*, cl_int*);
  cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const void*, void*);
  cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const void*, void*);
  cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const void*, void*);
  cl_int (*clFinish)(cl_command_queue);
  cl_int (*clReleaseMemObject)(cl_mem);
  cl_int (*clReleaseKernel)(cl_kernel);
  cl_int (*clReleaseProgram)(cl_program);
  cl_int (*clReleaseCommandQueue)(cl_command_queue);
  cl_int (*clReleaseContext)(cl_context);

  OpenCLBackend()
  : hOpenCL(NULL), ready(0),
    platform(NULL), device(NULL), context(NULL), queue(NULL), program(NULL), kCorr(NULL),
    bufFeat(NULL), bufCorr(NULL),
    featBytes(0), corrBytes(0),
    clGetPlatformIDs(NULL), clGetDeviceIDs(NULL), clCreateContext(NULL), clCreateCommandQueue(NULL),
    clCreateProgramWithSource(NULL), clBuildProgram(NULL), clGetProgramBuildInfo(NULL),
    clCreateKernel(NULL), clSetKernelArg(NULL),
    clCreateBuffer(NULL), clEnqueueWriteBuffer(NULL), clEnqueueReadBuffer(NULL),
    clEnqueueNDRangeKernel(NULL), clFinish(NULL),
    clReleaseMemObject(NULL), clReleaseKernel(NULL), clReleaseProgram(NULL),
    clReleaseCommandQueue(NULL), clReleaseContext(NULL)
  {}

  int loadSymbol(void** fp, const char* name) {
    *fp = (void*)GetProcAddress(hOpenCL, name);
    return (*fp != NULL);
  }

  const char* kernelSource() {
    return
      "__kernel void corr_pairwise(\n"
      "  __global const float* feat,\n"
      "  __global float* outCorr,\n"
      "  const int nAssets,\n"
      "  const int nFeat,\n"
      "  const int windowSize,\n"
      "  const float eps\n"
      "){\n"
      "  int a = (int)get_global_id(0);\n"
      "  int b = (int)get_global_id(1);\n"
      "  if(a >= nAssets || b >= nAssets) return;\n"
      "  if(a >= b) return;\n"
      "  float acc = 0.0f;\n"
      "  for(int f=0; f<nFeat; f++){\n"
      "    int baseA = (f*nAssets + a) * windowSize;\n"
      "    int baseB = (f*nAssets + b) * windowSize;\n"
      "    float mx = 0.0f;\n"
      "    float my = 0.0f;\n"
      "    for(int t=0; t<windowSize; t++){\n"
      "      mx += feat[baseA + t];\n"
      "      my += feat[baseB + t];\n"
      "    }\n"
      "    mx /= (float)windowSize;\n"
      "    my /= (float)windowSize;\n"
      "    float sxx = 0.0f;\n"
      "    float syy = 0.0f;\n"
      "    float sxy = 0.0f;\n"
      "    for(int t=0; t<windowSize; t++){\n"
      "      float dx = feat[baseA + t] - mx;\n"
      "      float dy = feat[baseB + t] - my;\n"
      "      sxx += dx*dx;\n"
      "      syy += dy*dy;\n"
      "      sxy += dx*dy;\n"
      "    }\n"
      "    float den = sqrt(sxx*syy + eps);\n"
      "    float corr = (den > eps) ? (sxy/den) : 0.0f;\n"
      "    acc += corr;\n"
      "  }\n"
      "  outCorr[a*nAssets + b] = acc / (float)nFeat;\n"
      "}\n";
  }

  void printBuildLog() {
    if(!clGetProgramBuildInfo || !program || !device) return;
    size_t logSize = 0;
    clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
    if(logSize == 0) return;
    char* log = (char*)malloc(logSize + 1);
    if(!log) return;
    memset(log, 0, logSize + 1);
    clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
    printf("OpenCL build log:\n%s\n", log);
    free(log);
  }

  void init() {
    ready = 0;

    hOpenCL = LoadLibraryA("OpenCL.dll");
    if(!hOpenCL) {
      printf("OpenCL: CPU (OpenCL.dll missing)\n");
      return;
    }

    if(!loadSymbol((void**)&clGetPlatformIDs,       "clGetPlatformIDs")) return;
    if(!loadSymbol((void**)&clGetDeviceIDs,         "clGetDeviceIDs")) return;
    if(!loadSymbol((void**)&clCreateContext,        "clCreateContext")) return;
    if(!loadSymbol((void**)&clCreateCommandQueue,   "clCreateCommandQueue")) return;
    if(!loadSymbol((void**)&clCreateProgramWithSource,"clCreateProgramWithSource")) return;
    if(!loadSymbol((void**)&clBuildProgram,         "clBuildProgram")) return;
    if(!loadSymbol((void**)&clGetProgramBuildInfo,  "clGetProgramBuildInfo")) return;
    if(!loadSymbol((void**)&clCreateKernel,         "clCreateKernel")) return;
    if(!loadSymbol((void**)&clSetKernelArg,         "clSetKernelArg")) return;

    if(!loadSymbol((void**)&clCreateBuffer,         "clCreateBuffer")) return;
    if(!loadSymbol((void**)&clEnqueueWriteBuffer,   "clEnqueueWriteBuffer")) return;
    if(!loadSymbol((void**)&clEnqueueReadBuffer,    "clEnqueueReadBuffer")) return;
    if(!loadSymbol((void**)&clEnqueueNDRangeKernel, "clEnqueueNDRangeKernel")) return;
    if(!loadSymbol((void**)&clFinish,               "clFinish")) return;

    if(!loadSymbol((void**)&clReleaseMemObject,     "clReleaseMemObject")) return;
    if(!loadSymbol((void**)&clReleaseKernel,        "clReleaseKernel")) return;
    if(!loadSymbol((void**)&clReleaseProgram,       "clReleaseProgram")) return;
    if(!loadSymbol((void**)&clReleaseCommandQueue,  "clReleaseCommandQueue")) return;
    if(!loadSymbol((void**)&clReleaseContext,       "clReleaseContext")) return;

    cl_uint nPlat = 0;
    if(clGetPlatformIDs(0, NULL, &nPlat) != CL_SUCCESS || nPlat == 0) {
      printf("OpenCL: CPU (no platform)\n");
      return;
    }
    clGetPlatformIDs(1, &platform, NULL);

    cl_uint nDev = 0;
    cl_int ok = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &nDev);
    if(ok != CL_SUCCESS || nDev == 0) {
      ok = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, &nDev);
      if(ok != CL_SUCCESS || nDev == 0) {
        printf("OpenCL: CPU (no device)\n");
        return;
      }
    }

    cl_int err = 0;
    context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
    if(err != CL_SUCCESS || !context) {
      printf("OpenCL: CPU (context fail)\n");
      return;
    }

    queue = clCreateCommandQueue(context, device, 0, &err);
    if(err != CL_SUCCESS || !queue) {
      printf("OpenCL: CPU (queue fail)\n");
      return;
    }

    const char* src = kernelSource();
    program = clCreateProgramWithSource(context, 1, &src, NULL, &err);
    if(err != CL_SUCCESS || !program) {
      printf("OpenCL: CPU (program fail)\n");
      return;
    }

    err = clBuildProgram(program, 1, &device, "", NULL, NULL);
    if(err != CL_SUCCESS) {
      printf("OpenCL: CPU (build fail)\n");
      printBuildLog();
      return;
    }

    kCorr = clCreateKernel(program, "corr_pairwise", &err);
    if(err != CL_SUCCESS || !kCorr) {
      printf("OpenCL: CPU (kernel fail)\n");
      printBuildLog();
      return;
    }

    featBytes = FEAT_N * N_ASSETS * FEAT_WINDOW * (int)sizeof(float);
    corrBytes = N_ASSETS * N_ASSETS * (int)sizeof(float);

    bufFeat = clCreateBuffer(context, CL_MEM_READ_ONLY, (size_t)featBytes, NULL, &err);
    if(err != CL_SUCCESS || !bufFeat) {
      printf("OpenCL: CPU (bufFeat fail)\n");
      return;
    }

    bufCorr = clCreateBuffer(context, CL_MEM_WRITE_ONLY, (size_t)corrBytes, NULL, &err);
    if(err != CL_SUCCESS || !bufCorr) {
      printf("OpenCL: CPU (bufCorr fail)\n");
      return;
    }

    ready = 1;
    printf("OpenCL: READY (kernel+buffers)\n");
  }

  void shutdown() {
    if(bufCorr) { clReleaseMemObject(bufCorr); bufCorr = NULL; }
    if(bufFeat) { clReleaseMemObject(bufFeat); bufFeat = NULL; }
    if(kCorr) { clReleaseKernel(kCorr); kCorr = NULL; }
    if(program) { clReleaseProgram(program); program = NULL; }
    if(queue) { clReleaseCommandQueue(queue); queue = NULL; }
    if(context) { clReleaseContext(context); context = NULL; }
    if(hOpenCL) { FreeLibrary(hOpenCL); hOpenCL = NULL; }
    ready = 0;
  }

  int computeCorrelationMatrixCL(const float* featLinear, float* outCorr, int nAssets, int nFeat, int windowSize) {
    if(!ready) return 0;
    if(!featLinear || !outCorr) return 0;

    cl_int err = clEnqueueWriteBuffer(queue, bufFeat, CL_TRUE, 0, (size_t)featBytes, featLinear, 0, NULL, NULL);
    if(err != CL_SUCCESS) return 0;

    float eps = 1e-12f;
    err = CL_SUCCESS;
    err |= clSetKernelArg(kCorr, 0, sizeof(cl_mem), &bufFeat);
    err |= clSetKernelArg(kCorr, 1, sizeof(cl_mem), &bufCorr);
    err |= clSetKernelArg(kCorr, 2, sizeof(int), &nAssets);
    err |= clSetKernelArg(kCorr, 3, sizeof(int), &nFeat);
    err |= clSetKernelArg(kCorr, 4, sizeof(int), &windowSize);
    err |= clSetKernelArg(kCorr, 5, sizeof(float), &eps);
    if(err != CL_SUCCESS) return 0;

    size_t global[2];
    global[0] = (size_t)nAssets;
    global[1] = (size_t)nAssets;

    err = clEnqueueNDRangeKernel(queue, kCorr, 2, NULL, global, NULL, 0, NULL, NULL);
    if(err != CL_SUCCESS) return 0;

    err = clFinish(queue);
    if(err != CL_SUCCESS) return 0;

    err = clEnqueueReadBuffer(queue, bufCorr, CL_TRUE, 0, (size_t)corrBytes, outCorr, 0, NULL, NULL);
    if(err != CL_SUCCESS) return 0;

    return 1;
  }
};

// ---------------------------- Learning Layer ----------------------------

struct LearningSnapshot {
  double meanScore;
  double meanCompactness;
  double meanVol;
  int regime;
  double regimeConfidence;
};

class UnsupervisedModel {
public:
  double centroids[3][3]; int counts[3]; int initialized;
  UnsupervisedModel() : initialized(0) { memset(centroids,0,sizeof(centroids)); memset(counts,0,sizeof(counts)); }
  void init(){ initialized=0; memset(centroids,0,sizeof(centroids)); memset(counts,0,sizeof(counts)); }
  void update(const LearningSnapshot& s, int* regimeOut, double* confOut){
    double x0=s.meanScore,x1=s.meanCompactness,x2=s.meanVol;
    if(!initialized){ for(int k=0;k<3;k++){ centroids[k][0]=x0+0.01*(k-1); centroids[k][1]=x1+0.01*(1-k); centroids[k][2]=x2+0.005*(k-1); counts[k]=1; } initialized=1; }
    int best=0; double bestDist=INF,secondDist=INF;
    for(int k=0;k<3;k++){ double d0=x0-centroids[k][0],d1=x1-centroids[k][1],d2=x2-centroids[k][2]; double dist=d0*d0+d1*d1+d2*d2; if(dist<bestDist){ secondDist=bestDist; bestDist=dist; best=k; } else if(dist<secondDist) secondDist=dist; }
    counts[best]++; double lr=1.0/(double)counts[best]; centroids[best][0]+=lr*(x0-centroids[best][0]); centroids[best][1]+=lr*(x1-centroids[best][1]); centroids[best][2]+=lr*(x2-centroids[best][2]);
    *regimeOut=best; *confOut=1.0/(1.0+sqrt(fabs(secondDist-bestDist)+EPS));
  }
};

class RLAgent {
public:
  double q[4]; int n[4]; int lastAction; double lastMeanScore;
  RLAgent() : lastAction(0), lastMeanScore(0) { for(int i=0;i<4;i++){q[i]=0;n[i]=0;} }
  void init(){ lastAction=0; lastMeanScore=0; for(int i=0;i<4;i++){q[i]=0;n[i]=0;} }
  int chooseAction(int updateCount){ if((updateCount%10)==0) return updateCount%4; int b=0; for(int i=1;i<4;i++) if(q[i]>q[b]) b=i; return b; }
  void updateReward(double newMeanScore){ double r=newMeanScore-lastMeanScore; n[lastAction]++; q[lastAction]+=(r-q[lastAction])/(double)n[lastAction]; lastMeanScore=newMeanScore; }
};

class PCAModel {
public:
  double hist[PCA_WINDOW][PCA_DIM];
  double mean[PCA_DIM];
  double stdev[PCA_DIM];
  double latent[PCA_COMP];
  double explainedVar[PCA_COMP];
  int writeIdx;
  int count;
  int rebuildEvery;
  int updates;
  double dom;
  double rot;
  double prevExplained0;

  PCAModel() : writeIdx(0), count(0), rebuildEvery(PCA_REBUILD_EVERY), updates(0), dom(0), rot(0), prevExplained0(0) {
    memset(hist, 0, sizeof(hist));
    memset(mean, 0, sizeof(mean));
    memset(stdev, 0, sizeof(stdev));
    memset(latent, 0, sizeof(latent));
    memset(explainedVar, 0, sizeof(explainedVar));
  }

  void init() {
    writeIdx = 0;
    count = 0;
    updates = 0;
    dom = 0;
    rot = 0;
    prevExplained0 = 0;
    memset(hist, 0, sizeof(hist));
    memset(mean, 0, sizeof(mean));
    memset(stdev, 0, sizeof(stdev));
    memset(latent, 0, sizeof(latent));
    memset(explainedVar, 0, sizeof(explainedVar));
  }

  void pushSnapshot(const double x[PCA_DIM]) {
    for(int d=0; d<PCA_DIM; d++) hist[writeIdx][d] = x[d];
    writeIdx = (writeIdx + 1) % PCA_WINDOW;
    if(count < PCA_WINDOW) count++;
  }

  void rebuildStats() {
    if(count <= 0) return;
    for(int d=0; d<PCA_DIM; d++) {
      double m = 0;
      for(int i=0; i<count; i++) m += hist[i][d];
      m /= (double)count;
      mean[d] = m;

      double v = 0;
      for(int i=0; i<count; i++) {
        double dd = hist[i][d] - m;
        v += dd * dd;
      }
      v /= (double)count;
      stdev[d] = sqrt(v + EPS);
    }
  }

  void update(const LearningSnapshot& snap, int regime, double conf) {
    double x[PCA_DIM];
    x[0] = snap.meanScore;
    x[1] = snap.meanCompactness;
    x[2] = snap.meanVol;
    x[3] = (double)regime / 2.0;
    x[4] = conf;
    x[5] = snap.meanScore - snap.meanCompactness;

    pushSnapshot(x);
    updates++;
    if((updates % rebuildEvery) == 0 || count < 4) rebuildStats();

    double z[PCA_DIM];
    for(int d=0; d<PCA_DIM; d++) z[d] = (x[d] - mean[d]) / (stdev[d] + EPS);

    latent[0] = 0.60*z[0] + 0.30*z[1] + 0.10*z[2];
    latent[1] = 0.25*z[0] - 0.45*z[1] + 0.20*z[2] + 0.10*z[4];
    latent[2] = 0.20*z[2] + 0.50*z[3] - 0.30*z[5];

    double a0 = fabs(latent[0]);
    double a1 = fabs(latent[1]);
    double a2 = fabs(latent[2]);
    double sumA = a0 + a1 + a2 + EPS;

    explainedVar[0] = a0 / sumA;
    explainedVar[1] = a1 / sumA;
    explainedVar[2] = a2 / sumA;

    dom = explainedVar[0];
    rot = fabs(explainedVar[0] - prevExplained0);
    prevExplained0 = explainedVar[0];
  }
};

class GMMRegimeModel {
public:
  double pi[GMM_K];
  double mu[GMM_K][GMM_DIM];
  double var[GMM_K][GMM_DIM];
  double p[GMM_K];
  double entropy;
  double conf;
  int bestRegime;
  int initialized;

  GMMRegimeModel() : entropy(0), conf(0), bestRegime(0), initialized(0) {
    memset(pi, 0, sizeof(pi));
    memset(mu, 0, sizeof(mu));
    memset(var, 0, sizeof(var));
    memset(p, 0, sizeof(p));
  }

  void init() {
    initialized = 0;
    entropy = 0;
    conf = 0;
    bestRegime = 0;
    for(int k=0;k<GMM_K;k++) {
      pi[k] = 1.0 / (double)GMM_K;
      for(int d=0; d<GMM_DIM; d++) {
        mu[k][d] = 0.02 * (k - 1);
        var[k][d] = 1.0;
      }
      p[k] = 1.0 / (double)GMM_K;
    }
    initialized = 1;
  }

  static double gaussianDiag(const double* x, const double* m, const double* v) {
    double logp = 0;
    for(int d=0; d<GMM_DIM; d++) {
      double vv = v[d];
      if(vv < GMM_VAR_FLOOR) vv = GMM_VAR_FLOOR;
      double z = x[d] - m[d];
      logp += -0.5 * (z*z / vv + log(vv + EPS));
    }
    if(logp < -80.0) logp = -80.0;
    return exp(logp);
  }

  void infer(const double x[GMM_DIM]) {
    if(!initialized) init();
    double sum = 0;
    for(int k=0;k<GMM_K;k++) {
      double g = gaussianDiag(x, mu[k], var[k]);
      p[k] = pi[k] * g;
      sum += p[k];
    }
    if(sum < EPS) {
      for(int k=0;k<GMM_K;k++) p[k] = 1.0 / (double)GMM_K;
    } else {
      for(int k=0;k<GMM_K;k++) p[k] /= sum;
    }

    bestRegime = 0;
    conf = p[0];
    for(int k=1;k<GMM_K;k++) {
      if(p[k] > conf) {
        conf = p[k];
        bestRegime = k;
      }
    }

    entropy = 0;
    for(int k=0;k<GMM_K;k++) entropy -= p[k] * log(p[k] + EPS);

#if GMM_ONLINE_UPDATE
    // lightweight incremental update (EM-like with forgetting)
    for(int k=0;k<GMM_K;k++) {
      double w = GMM_ALPHA * p[k];
      pi[k] = (1.0 - GMM_ALPHA) * pi[k] + w;
      for(int d=0; d<GMM_DIM; d++) {
        double diff = x[d] - mu[k][d];
        mu[k][d] += w * diff;
        var[k][d] = (1.0 - w) * var[k][d] + w * diff * diff;
        if(var[k][d] < GMM_VAR_FLOOR) var[k][d] = GMM_VAR_FLOOR;
      }
    }
#endif
  }
};


class HMMRegimeModel {
public:
  double A[HMM_K][HMM_K];
  double mu[HMM_K][HMM_DIM];
  double var[HMM_K][HMM_DIM];
  double posterior[HMM_K];
  double entropy;
  double conf;
  double switchProb;
  int regime;
  int initialized;

  HMMRegimeModel() : entropy(0), conf(0), switchProb(0), regime(0), initialized(0) {
    memset(A, 0, sizeof(A));
    memset(mu, 0, sizeof(mu));
    memset(var, 0, sizeof(var));
    memset(posterior, 0, sizeof(posterior));
  }

  void init() {
    for(int i=0;i<HMM_K;i++) {
      for(int j=0;j<HMM_K;j++) A[i][j] = (i==j) ? 0.90 : 0.10/(double)(HMM_K-1);
      for(int d=0; d<HMM_DIM; d++) {
        mu[i][d] = 0.03 * (i - 1);
        var[i][d] = 1.0;
      }
      posterior[i] = 1.0/(double)HMM_K;
    }
    regime = 0;
    conf = posterior[0];
    entropy = 0;
    switchProb = 0;
    initialized = 1;
  }

  static double emissionDiag(const double* x, const double* m, const double* v) {
    double logp = 0;
    for(int d=0; d<HMM_DIM; d++) {
      double vv = v[d];
      if(vv < HMM_VAR_FLOOR) vv = HMM_VAR_FLOOR;
      double z = x[d] - m[d];
      logp += -0.5 * (z*z / vv + log(vv + EPS));
    }
    if(logp < -80.0) logp = -80.0;
    return exp(logp);
  }

  void filter(const double obs[HMM_DIM]) {
    if(!initialized) init();

    double pred[HMM_K];
    for(int j=0;j<HMM_K;j++) {
      pred[j] = 0;
      for(int i=0;i<HMM_K;i++) pred[j] += posterior[i] * A[i][j];
    }

    double alpha[HMM_K];
    double sum = 0;
    for(int k=0;k<HMM_K;k++) {
      double emit = emissionDiag(obs, mu[k], var[k]);
      alpha[k] = pred[k] * emit;
      sum += alpha[k];
    }
    if(sum < EPS) {
      for(int k=0;k<HMM_K;k++) alpha[k] = 1.0/(double)HMM_K;
    } else {
      for(int k=0;k<HMM_K;k++) alpha[k] /= sum;
    }

    for(int k=0;k<HMM_K;k++) posterior[k] = alpha[k];

    regime = 0;
    conf = posterior[0];
    for(int k=1;k<HMM_K;k++) if(posterior[k] > conf) { conf = posterior[k]; regime = k; }

    entropy = 0;
    for(int k=0;k<HMM_K;k++) entropy -= posterior[k] * log(posterior[k] + EPS);

    switchProb = 1.0 - A[regime][regime];
    if(switchProb < 0) switchProb = 0;
    if(switchProb > 1) switchProb = 1;

#if HMM_ONLINE_UPDATE
    for(int k=0;k<HMM_K;k++) {
      double w = HMM_SMOOTH * posterior[k];
      for(int d=0; d<HMM_DIM; d++) {
        double diff = obs[d] - mu[k][d];
        mu[k][d] += w * diff;
        var[k][d] = (1.0 - w) * var[k][d] + w * diff * diff;
        if(var[k][d] < HMM_VAR_FLOOR) var[k][d] = HMM_VAR_FLOOR;
      }
    }
#endif
  }
};

class StrategyController {
public:
  UnsupervisedModel unsup;
  RLAgent rl;
  PCAModel pca;
  GMMRegimeModel gmm;
  HMMRegimeModel hmm;
  int dynamicTopK;
  double scoreScale;
  int regime;
  double adaptiveGamma;
  double adaptiveAlpha;
  double adaptiveBeta;
  double adaptiveLambda;
  double riskScale;
  int cooldown;

  StrategyController()
  : dynamicTopK(TOP_K), scoreScale(1.0), regime(0),
    adaptiveGamma(1.0), adaptiveAlpha(1.0), adaptiveBeta(1.0), adaptiveLambda(1.0), riskScale(1.0), cooldown(0) {}

  static double clampRange(double x, double lo, double hi) {
    if(x < lo) return lo;
    if(x > hi) return hi;
    return x;
  }

  void init() {
    unsup.init();
    rl.init();
    pca.init();
    gmm.init();
    hmm.init();
    dynamicTopK = TOP_K;
    scoreScale = 1.0;
    regime = 0;
    adaptiveGamma = 1.0;
    adaptiveAlpha = 1.0;
    adaptiveBeta = 1.0;
    adaptiveLambda = 1.0;
    riskScale = 1.0;
    cooldown = 0;
  }

  void buildGMMState(const LearningSnapshot& snap, int reg, double conf, double x[GMM_DIM]) {
    x[0] = snap.meanScore;
    x[1] = snap.meanCompactness;
    x[2] = snap.meanVol;
    x[3] = pca.dom;
    x[4] = pca.rot;
    x[5] = (double)reg / 2.0;
    x[6] = conf;
    x[7] = snap.meanScore - snap.meanCompactness;
  }

  void buildHMMObs(const LearningSnapshot& snap, int reg, double conf, double x[HMM_DIM]) {
    x[0] = pca.latent[0];
    x[1] = pca.latent[1];
    x[2] = pca.latent[2];
    x[3] = snap.meanVol;
    x[4] = snap.meanScore;
    x[5] = snap.meanCompactness;
    x[6] = (double)reg / 2.0;
    x[7] = conf;
  }

  void onUpdate(const LearningSnapshot& snap, fvar* scores, int nScores, int updateCount) {
#if USE_ML
    double unsupConf = 0;
    unsup.update(snap, &regime, &unsupConf);
#if USE_PCA
    pca.update(snap, regime, unsupConf);
#else
    pca.dom = 0.5;
    pca.rot = 0.0;
#endif

#if USE_GMM
    double gx[GMM_DIM];
    buildGMMState(snap, regime, unsupConf, gx);
    gmm.infer(gx);
#if USE_HMM
    double hx[HMM_DIM];
    buildHMMObs(snap, regime, unsupConf, hx);
    hmm.filter(hx);
#endif
    // regime presets: [gamma, alpha, beta, lambda]
    const double presets[GMM_K][4] = {
      {1.05, 1.00, 0.95, 1.00},
      {0.95, 1.05, 1.05, 0.95},
      {1.00, 0.95, 1.10, 1.05}
    };
    adaptiveGamma = 0;
    adaptiveAlpha = 0;
    adaptiveBeta  = 0;
    adaptiveLambda = 0;
    for(int k=0;k<GMM_K;k++) {
#if USE_HMM
      adaptiveGamma += hmm.posterior[k] * presets[k][0];
      adaptiveAlpha += hmm.posterior[k] * presets[k][1];
      adaptiveBeta  += hmm.posterior[k] * presets[k][2];
      adaptiveLambda += hmm.posterior[k] * presets[k][3];
#else
      adaptiveGamma += gmm.p[k] * presets[k][0];
      adaptiveAlpha += gmm.p[k] * presets[k][1];
      adaptiveBeta  += gmm.p[k] * presets[k][2];
      adaptiveLambda += gmm.p[k] * presets[k][3];
#endif
    }
#if USE_HMM
    double entNorm = hmm.entropy / log((double)HMM_K + EPS);
    riskScale = clampRange(1.0 - 0.45 * entNorm, HMM_MIN_RISK, 1.0);
    if(hmm.entropy > HMM_ENTROPY_TH || hmm.switchProb > HMM_SWITCH_TH) cooldown = HMM_COOLDOWN_UPDATES;
    else if(cooldown > 0) cooldown--;
#else
    double entNorm = gmm.entropy / log((double)GMM_K + EPS);
    riskScale = clampRange(1.0 - GMM_ENTROPY_COEFF * entNorm, GMM_MIN_RISK, 1.0);
#endif
#else
    adaptiveGamma = 1.0 + 0.35 * pca.dom - 0.25 * pca.rot;
    adaptiveAlpha = 1.0 + 0.30 * pca.dom;
    adaptiveBeta  = 1.0 + 0.25 * pca.rot;
    adaptiveLambda = 1.0 + 0.20 * pca.dom - 0.20 * pca.rot;
    riskScale = 1.0;
#endif

    adaptiveGamma = clampRange(adaptiveGamma, 0.80, 1.40);
    adaptiveAlpha = clampRange(adaptiveAlpha, 0.85, 1.35);
    adaptiveBeta  = clampRange(adaptiveBeta, 0.85, 1.35);
    adaptiveLambda = clampRange(adaptiveLambda, 0.85, 1.25);

    rl.updateReward(snap.meanScore);
    rl.lastAction = rl.chooseAction(updateCount);

    int baseTopK = TOP_K;
    if(rl.lastAction == 0) baseTopK = TOP_K - 2;
    else if(rl.lastAction == 1) baseTopK = TOP_K;
    else if(rl.lastAction == 2) baseTopK = TOP_K;
    else baseTopK = TOP_K - 1;

    double profileBias[5] = {1.00, 0.98, 0.99, 0.97, 1.02};
    scoreScale = (1.0 + 0.06 * (adaptiveGamma - 1.0) + 0.04 * (adaptiveAlpha - 1.0) - 0.04 * (adaptiveBeta - 1.0))
               * profileBias[STRATEGY_PROFILE] * riskScale;

    if(pca.dom > 0.60) baseTopK -= 1;
    if(pca.rot > 0.15) baseTopK -= 1;
#if USE_HMM
    if(hmm.regime == 2) baseTopK -= 1;
    if(cooldown > 0) baseTopK -= 1;
#elif USE_GMM
    if(gmm.bestRegime == 2) baseTopK -= 1;
#endif

    dynamicTopK = baseTopK;
    if(dynamicTopK < 1) dynamicTopK = 1;
    if(dynamicTopK > TOP_K) dynamicTopK = TOP_K;

    for(int i=0; i<nScores; i++) {
      double s = (double)scores[i] * scoreScale;
      if(s > 1.0) s = 1.0;
      if(s < 0.0) s = 0.0;
      scores[i] = (fvar)s;
    }
#else
    (void)snap; (void)scores; (void)nScores; (void)updateCount;
#endif
  }
};

// ---------------------------- Strategy ----------------------------

class RegimeSwitcherStrategy {
public:
  ExposureTable exposureTable;
  FeatureBufferSoA featSoA;
  OpenCLBackend openCL;

  SlabAllocator<fvar> corrMatrix;
  SlabAllocator<fvar> distMatrix;
  SlabAllocator<fvar> compactness;
  SlabAllocator<fvar> regime;
  SlabAllocator<fvar> scores;

  SlabAllocator<float> featLinear;
  SlabAllocator<float> corrLinear;

  int barCount;
  int updateCount;
  int currentRegime;
  StrategyController controller;

  RegimeSwitcherStrategy() : barCount(0), updateCount(0), currentRegime(0) {}

  void init() {
    printf("RegimeSwitcher_v7: Initializing...\n");

    exposureTable.init();
    featSoA.init(N_ASSETS, FEAT_WINDOW);

    corrMatrix.init(N_ASSETS * N_ASSETS);
    distMatrix.init(N_ASSETS * N_ASSETS);
    compactness.init(N_ASSETS);
    regime.init(N_ASSETS);
    scores.init(N_ASSETS);

    featLinear.init(FEAT_N * N_ASSETS * FEAT_WINDOW);
    corrLinear.init(N_ASSETS * N_ASSETS);

    openCL.init();
    printf("RegimeSwitcher_v7: Ready (OpenCL=%d)\n", openCL.ready);
    controller.init();

    barCount = 0;
    updateCount = 0;
  }

  void shutdown() {
    printf("RegimeSwitcher_v7: Shutting down...\n");

    openCL.shutdown();

    featSoA.shutdown();
    corrMatrix.shutdown();
    distMatrix.shutdown();
    compactness.shutdown();
    regime.shutdown();
    scores.shutdown();

    featLinear.shutdown();
    corrLinear.shutdown();
  }

  void computeFeatures(int assetIdx) {
    asset((char*)ASSET_NAMES[assetIdx]);

    vars C = series(priceClose(0));
    vars V = series(Volatility(C, 20));

    if(Bar < 50) return;

    fvar r1 = (fvar)log(C[0] / C[1]);
    fvar rN = (fvar)log(C[0] / C[12]);
    fvar vol = (fvar)V[0];
    fvar zscore = (fvar)((C[0] - C[50]) / (V[0] * 20.0 + EPS));
    fvar rangeP = (fvar)((C[0] - C[50]) / (C[0] + EPS));
    fvar flow = (fvar)(r1 * vol);

    fvar reg = 0;
    if(vol > 0.001) reg = (fvar)1.0;
    else reg = (fvar)0.0;

    fvar volOfVol = (fvar)(vol * vol);
    fvar persistence = (fvar)fabs(r1);

    featSoA.push(0, assetIdx, r1);
    featSoA.push(1, assetIdx, rN);
    featSoA.push(2, assetIdx, vol);
    featSoA.push(3, assetIdx, zscore);
    featSoA.push(4, assetIdx, rangeP);
    featSoA.push(5, assetIdx, flow);
    featSoA.push(6, assetIdx, reg);
    featSoA.push(7, assetIdx, volOfVol);
    featSoA.push(8, assetIdx, persistence);
  }

  fvar detectRegime() {
    fvar v = 0;
    for(int i=0;i<N_ASSETS;i++) v += featSoA.get(2, i, 0);
    v /= (fvar)N_ASSETS;

    if(v > (fvar)0.0015) currentRegime = 2;
    else if(v > (fvar)0.0008) currentRegime = 1;
    else currentRegime = 0;

    return (fvar)currentRegime;
  }

  void computeCorrelationMatrixCPU() {
    for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrMatrix[i] = 0;

    for(int f=0; f<FEAT_N; f++){
      for(int a=0; a<N_ASSETS; a++){
        for(int b=a+1; b<N_ASSETS; b++){
          fvar mx = 0, my = 0;
          for(int t=0; t<FEAT_WINDOW; t++){
            mx += featSoA.get(f,a,t);
            my += featSoA.get(f,b,t);
          }
          mx /= (fvar)FEAT_WINDOW;
          my /= (fvar)FEAT_WINDOW;

          fvar sxx = 0, syy = 0, sxy = 0;
          for(int t=0; t<FEAT_WINDOW; t++){
            fvar dx = featSoA.get(f,a,t) - mx;
            fvar dy = featSoA.get(f,b,t) - my;
            sxx += dx*dx;
            syy += dy*dy;
            sxy += dx*dy;
          }

          fvar den = (fvar)sqrt((double)(sxx*syy + (fvar)EPS));
          fvar corr = 0;
          if(den > (fvar)EPS) corr = sxy / den;
          else corr = 0;

          int idx = a*N_ASSETS + b;
          corrMatrix[idx] += corr / (fvar)FEAT_N;
          corrMatrix[b*N_ASSETS + a] = corrMatrix[idx];
        }
      }
    }
  }

  void buildFeatLinear() {
    int idx = 0;
    for(int f=0; f<FEAT_N; f++){
      for(int a=0; a<N_ASSETS; a++){
        for(int t=0; t<FEAT_WINDOW; t++){
          featLinear[idx] = (float)featSoA.get(f, a, t);
          idx++;
        }
      }
    }
  }

  void computeCorrelationMatrix() {
    if(openCL.ready) {
      buildFeatLinear();

      for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrLinear[i] = 0.0f;

      int ok = openCL.computeCorrelationMatrixCL(
        featLinear.data,
        corrLinear.data,
        N_ASSETS,
        FEAT_N,
        FEAT_WINDOW
      );

      if(ok) {
        for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrMatrix[i] = (fvar)0;

        for(int a=0; a<N_ASSETS; a++){
          corrMatrix[a*N_ASSETS + a] = (fvar)1.0;
          for(int b=a+1; b<N_ASSETS; b++){
            float c = corrLinear[a*N_ASSETS + b];
            corrMatrix[a*N_ASSETS + b] = (fvar)c;
            corrMatrix[b*N_ASSETS + a] = (fvar)c;
          }
        }
        return;
      }

      printf("OpenCL: runtime fail -> CPU fallback\n");
      openCL.ready = 0;
    }

    computeCorrelationMatrixCPU();
  }

  void computeDistanceMatrix() {
    for(int i=0;i<N_ASSETS;i++){
      for(int j=0;j<N_ASSETS;j++){
        if(i == j) {
          distMatrix[i*N_ASSETS + j] = (fvar)0;
        } else {
          fvar corrDist = (fvar)1.0 - (fvar)fabs((double)corrMatrix[i*N_ASSETS + j]);
          fvar expDist  = (fvar)exposureTable.getDist(i, j);

          fvar blended = (fvar)LAMBDA_META * corrDist + (fvar)(1.0 - (double)LAMBDA_META) * expDist;
          distMatrix[i*N_ASSETS + j] = blended;
        }
      }
    }
  }

  void floydWarshall() {
    fvar d[28][28];

    for(int i=0;i<N_ASSETS;i++){
      for(int j=0;j<N_ASSETS;j++){
        d[i][j] = distMatrix[i*N_ASSETS + j];
        if(i == j) d[i][j] = (fvar)0;
        if(d[i][j] < (fvar)0) d[i][j] = (fvar)INF;
      }
    }

    for(int k=0;k<N_ASSETS;k++){
      for(int i=0;i<N_ASSETS;i++){
        for(int j=0;j<N_ASSETS;j++){
          if(d[i][k] < (fvar)INF && d[k][j] < (fvar)INF) {
            fvar nk = d[i][k] + d[k][j];
            if(nk < d[i][j]) d[i][j] = nk;
          }
        }
      }
    }

    for(int i=0;i<N_ASSETS;i++){
      fvar w = 0;
      for(int j=i+1;j<N_ASSETS;j++){
        if(d[i][j] < (fvar)INF) w += d[i][j];
      }
      if(w > (fvar)0) compactness[i] = (fvar)(1.0 / (1.0 + (double)w));
      else compactness[i] = (fvar)0;

      regime[i] = detectRegime();
    }
  }

  void computeScores() {
    for(int i=0;i<N_ASSETS;i++){
      fvar coupling = 0;
      int count = 0;

      for(int j=0;j<N_ASSETS;j++){
        if(i != j && distMatrix[i*N_ASSETS + j] < (fvar)INF) {
          coupling += compactness[j];
          count++;
        }
      }

      fvar pCouple = 0;
      if(count > 0) pCouple = coupling / (fvar)count;
      else pCouple = (fvar)0;

      fvar rawScore = (fvar)ALPHA * regime[i] + (fvar)GAMMA * compactness[i] - (fvar)BETA * pCouple;

      if(rawScore > (fvar)30) rawScore = (fvar)30;
      if(rawScore < (fvar)-30) rawScore = (fvar)-30;

      scores[i] = (fvar)(1.0 / (1.0 + exp(-(double)rawScore)));
    }
  }

  LearningSnapshot buildSnapshot() {
    LearningSnapshot s;
    s.meanScore = 0; s.meanCompactness = 0; s.meanVol = 0;
    for(int i=0;i<N_ASSETS;i++) {
      s.meanScore += (double)scores[i];
      s.meanCompactness += (double)compactness[i];
      s.meanVol += (double)featSoA.get(2, i, 0);
    }
    s.meanScore /= (double)N_ASSETS;
    s.meanCompactness /= (double)N_ASSETS;
    s.meanVol /= (double)N_ASSETS;
    s.regime = currentRegime;
    s.regimeConfidence = 0;
    return s;
  }

  void onBar() {
    barCount++;

    for(int i=0;i<N_ASSETS;i++) computeFeatures(i);

    if(barCount % UPDATE_EVERY == 0) {
      updateCount++;

      computeCorrelationMatrix();
      computeDistanceMatrix();
      floydWarshall();
      computeScores();
      controller.onUpdate(buildSnapshot(), scores.data, N_ASSETS, updateCount);
      printTopK();
    }
  }

  void printTopK() {
    int indices[N_ASSETS];
    for(int i=0;i<N_ASSETS;i++) indices[i] = i;

    int topN = controller.dynamicTopK;
    for(int i=0;i<topN;i++){
      for(int j=i+1;j<N_ASSETS;j++){
        if(scores[indices[j]] > scores[indices[i]]) {
          int tmp = indices[i];
          indices[i] = indices[j];
          indices[j] = tmp;
        }
      }
    }

    if(updateCount % 10 == 0) {
      printf("===RegimeSwitcher_v7 Top-K(update#%d,Reg=%d,OpenCL=%d)===\n",
        updateCount, currentRegime, openCL.ready);

      for(int i=0;i<topN;i++){
        int idx = indices[i];
        printf(" %d.%s: score=%.4f\n", i+1, ASSET_NAMES[idx], (double)scores[idx]);
      }
    }
  }
};

// ---------------------------- Zorro DLL entry ----------------------------

static RegimeSwitcherStrategy* S = NULL;

DLLFUNC void run()
{
  if(is(INITRUN)) {
    BarPeriod = 60;
    LookBack = max(LookBack, FEAT_WINDOW + 50);

    asset((char*)ASSET_NAMES[0]);

    if(!S) {
      S = new RegimeSwitcherStrategy();
      S->init();
    }
  }

  if(is(EXITRUN)) {
    if(S) {
      S->shutdown();
      delete S;
      S = NULL;
    }
    return;
  }

  if(!S || Bar < LookBack)
    return;

  S->onBar();
}