NeuroWeave CompactNet is a machine learning driven portfolio engine that treats a basket of currency pairs as a single adaptive organism. It watches how each pair behaves through a compact set of features, then learns how relationships across the whole basket change over time, and finally uses multiple learning modules to decide how selective and how aggressive the strategy should be. The engine builds a rolling feature memory for every pair, so the system always has recent behavior available in a consistent format. It then converts that memory into a relationship map that reflects how similarly pairs have been behaving across multiple feature channels. To keep this step fast, the code can push the expensive correlation work to a graphics device through an optional OpenCL backend, while always keeping a full CPU fallback for reliability.

The learning layer is not a single model, but a controller that fuses several perspectives. An unsupervised clustering core forms a first pass regime label and confidence from basket level summaries such as average score, average compactness, and average volatility. A principal component style compressor then tracks whether one direction of behavior is dominating and whether the structure is rotating, which becomes an early warning signal for instability. On top of that, mixture based regime inference estimates how many different market “moods” are active at once, and how uncertain the system should be about its current mood. A hidden state filter adds memory to regime changes, allowing the controller to detect noisy switching and enforce cooldown periods. A lightweight clustering model adds stability checks and can reduce risk when the current assignment is unreliable. Reinforcement learning wraps around the whole loop by testing small variations of selection intensity and learning which choice tends to improve the average score over time.

A novelty detector based on autoencoder reconstruction error watches for patterns that do not look like the recent past. When novelty rises, the controller automatically reduces exposure, narrows the set of chosen pairs, and scales down scores. Finally, community and hierarchical grouping methods shape selection so that chosen pairs are not all from the same tightly connected cluster, improving diversity even when the market becomes crowded. The outcome is a self tuning selector that learns regimes, monitors uncertainty, detects novelty, and adapts portfolio concentration without relying on a single fragile model.

Code
// TGr06A_CompactDominant_v12.cpp - Zorro64 Strategy DLL
// Strategy A v12: Compactness-Dominant with MX06 OOP + OpenCL + Learning Controller
// Notes:
// - Keeps full CPU fallback.
// - OpenCL is optional: if OpenCL.dll missing / no device / kernel build fails -> CPU path.
// - OpenCL accelerates the heavy correlation matrix step by offloading pairwise correlations.
// - Correlation is computed in float on GPU; results are stored back into fvar corrMatrix.

#define _CRT_SECURE_NO_WARNINGS
#include <zorro.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <windows.h>
#include <stddef.h>

#define INF 1e30
#define EPS 1e-12
#define N_ASSETS 28
#define FEAT_N 9
#define FEAT_WINDOW 200
#define UPDATE_EVERY 5
#define TOP_K 5

#define ALPHA 0.1
#define BETA 0.2
#define GAMMA 3.0
#define LAMBDA_META 0.7

#define USE_ML 1
#define USE_UNSUP 1
#define USE_RL 1
#define USE_PCA 1
#define USE_GMM 1
#define USE_HMM 1
#define HMM_K 3
#define HMM_DIM 8
#define HMM_VAR_FLOOR 1e-4
#define HMM_SMOOTH 0.02
#define HMM_ENTROPY_TH 0.85
#define HMM_SWITCH_TH 0.35
#define HMM_MIN_RISK 0.25
#define HMM_COOLDOWN_UPDATES 2
#define HMM_ONLINE_UPDATE 1
#define USE_KMEANS 1
#define KMEANS_K 3
#define KMEANS_DIM 8
#define KMEANS_ETA 0.03
#define KMEANS_DIST_EMA 0.08
#define KMEANS_STABILITY_MIN 0.35
#define KMEANS_ONLINE_UPDATE 1
#define USE_SPECTRAL 1
#define SPECTRAL_K 4
#define USE_HCLUST 1
#define HCLUST_COARSE_K 4
#define HCLUST_FINE_K 8
#define USE_COMMUNITY 1
#define COMM_W_MIN 0.15
#define COMM_TOPM 6
#define COMM_ITERS 4
#define COMM_Q_EMA 0.20
#define COMM_Q_LOW 0.20
#define COMM_Q_HIGH 0.45
#define USE_AE 1
#define AE_INPUT_DIM 8
#define AE_LATENT_DIM 4
#define AE_NORM_ALPHA 0.02
#define AE_ERR_EMA 0.10
#define AE_Z_LOW 1.0
#define AE_Z_HIGH 2.0
#define GMM_K 3
#define GMM_DIM 8
#define GMM_ALPHA 0.02
#define GMM_VAR_FLOOR 1e-4
#define GMM_ENTROPY_COEFF 0.45
#define GMM_MIN_RISK 0.25
#define GMM_ONLINE_UPDATE 1
#define STRATEGY_PROFILE 0
#define PCA_DIM 6
#define PCA_COMP 3
#define PCA_WINDOW 128
#define PCA_REBUILD_EVERY 4

#ifdef TIGHT_MEM
typedef float fvar;
#else
typedef double fvar;
#endif

static const char* ASSET_NAMES[] = {
  "EURUSD","GBPUSD","USDCHF","USDJPY","AUDUSD","AUDCAD","AUDCHF","AUDJPY","AUDNZD",
  "CADJPY","CADCHF","EURAUD","EURCAD","EURCHF","EURGBP","EURJPY","EURNZD","GBPAUD",
  "GBPCAD","GBPCHF","GBPJPY","GBPNZD","NZDCAD","NZDCHF","NZDJPY","NZDUSD","USDCAD"
};
static const char* CURRENCIES[] = {"EUR","GBP","USD","CHF","JPY","AUD","CAD","NZD"};
#define N_CURRENCIES 8

// ---------------------------- Exposure Table ----------------------------

struct ExposureTable {
  int exposure[N_ASSETS][N_CURRENCIES];
  double exposureDist[N_ASSETS][N_ASSETS];

  void init() {
    for(int i=0;i<N_ASSETS;i++){
      for(int c=0;c<N_CURRENCIES;c++){
        exposure[i][c] = 0;
      }
    }
    for(int i=0;i<N_ASSETS;i++){
      for(int j=0;j<N_ASSETS;j++){
        exposureDist[i][j] = 0.0;
      }
    }
  }

  inline double getDist(int i,int j) const { return exposureDist[i][j]; }
};

// ---------------------------- Slab Allocator ----------------------------

template<typename T>
class SlabAllocator {
public:
  T* data;
  int capacity;

  SlabAllocator() : data(NULL), capacity(0) {}
  ~SlabAllocator() { shutdown(); }

  void init(int size) {
    shutdown();
    capacity = size;
    data = (T*)malloc((size_t)capacity * sizeof(T));
    if(data) memset(data, 0, (size_t)capacity * sizeof(T));
  }

  void shutdown() {
    if(data) free(data);
    data = NULL;
    capacity = 0;
  }

  T& operator[](int i) { return data[i]; }
  const T& operator[](int i) const { return data[i]; }
};

// ---------------------------- Feature Buffer (SoA ring) ----------------------------

struct FeatureBufferSoA {
  SlabAllocator<fvar> buffer;
  int windowSize;
  int currentIndex;

  void init(int assets, int window) {
    windowSize = window;
    currentIndex = 0;
    buffer.init(FEAT_N * assets * window);
  }

  void shutdown() { buffer.shutdown(); }

  inline int offset(int feat,int asset,int t) const {
    return (feat * N_ASSETS + asset) * windowSize + t;
  }

  void push(int feat,int asset,fvar value) {
    buffer[offset(feat, asset, currentIndex)] = value;
    currentIndex = (currentIndex + 1) % windowSize;
  }

  // t=0 => most recent
  fvar get(int feat,int asset,int t) const {
    int idx = (currentIndex - 1 - t + windowSize) % windowSize;
    return buffer[offset(feat, asset, idx)];
  }
};

// ---------------------------- Minimal OpenCL (dynamic) ----------------------------

typedef struct _cl_platform_id*   cl_platform_id;
typedef struct _cl_device_id*     cl_device_id;
typedef struct _cl_context*       cl_context;
typedef struct _cl_command_queue* cl_command_queue;
typedef struct _cl_program*       cl_program;
typedef struct _cl_kernel*        cl_kernel;
typedef struct _cl_mem*           cl_mem;
typedef unsigned int              cl_uint;
typedef int                       cl_int;
typedef unsigned long long        cl_ulong;
typedef size_t                    cl_bool;

#define CL_SUCCESS 0
#define CL_DEVICE_TYPE_CPU (1ULL << 1)
#define CL_DEVICE_TYPE_GPU (1ULL << 2)
#define CL_MEM_READ_ONLY   (1ULL << 2)
#define CL_MEM_WRITE_ONLY  (1ULL << 1)
#define CL_MEM_READ_WRITE  (1ULL << 0)
#define CL_TRUE  1
#define CL_FALSE 0
#define CL_PROGRAM_BUILD_LOG 0x1183

class OpenCLBackend {
public:
  HMODULE hOpenCL;
  int ready;

  cl_platform_id platform;
  cl_device_id device;
  cl_context context;
  cl_command_queue queue;
  cl_program program;
  cl_kernel kCorr;

  cl_mem bufFeat;
  cl_mem bufCorr;

  int featBytes;
  int corrBytes;

  cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*);
  cl_int (*clGetDeviceIDs)(cl_platform_id, cl_ulong, cl_uint, cl_device_id*, cl_uint*);
  cl_context (*clCreateContext)(void*, cl_uint, const cl_device_id*, void*, void*, cl_int*);
  cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_ulong, cl_int*);
  cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*);
  cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void*, void*);
  cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_uint, size_t, void*, size_t*);
  cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*);
  cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*);
  cl_mem (*clCreateBuffer)(cl_context, cl_ulong, size_t, void*, cl_int*);
  cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const void*, void*);
  cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const void*, void*);
  cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const void*, void*);
  cl_int (*clFinish)(cl_command_queue);
  cl_int (*clReleaseMemObject)(cl_mem);
  cl_int (*clReleaseKernel)(cl_kernel);
  cl_int (*clReleaseProgram)(cl_program);
  cl_int (*clReleaseCommandQueue)(cl_command_queue);
  cl_int (*clReleaseContext)(cl_context);

  OpenCLBackend()
  : hOpenCL(NULL), ready(0),
    platform(NULL), device(NULL), context(NULL), queue(NULL), program(NULL), kCorr(NULL),
    bufFeat(NULL), bufCorr(NULL),
    featBytes(0), corrBytes(0),
    clGetPlatformIDs(NULL), clGetDeviceIDs(NULL), clCreateContext(NULL), clCreateCommandQueue(NULL),
    clCreateProgramWithSource(NULL), clBuildProgram(NULL), clGetProgramBuildInfo(NULL),
    clCreateKernel(NULL), clSetKernelArg(NULL),
    clCreateBuffer(NULL), clEnqueueWriteBuffer(NULL), clEnqueueReadBuffer(NULL),
    clEnqueueNDRangeKernel(NULL), clFinish(NULL),
    clReleaseMemObject(NULL), clReleaseKernel(NULL), clReleaseProgram(NULL),
    clReleaseCommandQueue(NULL), clReleaseContext(NULL)
  {}

  int loadSymbol(void** fp, const char* name) {
    *fp = (void*)GetProcAddress(hOpenCL, name);
    return (*fp != NULL);
  }

  const char* kernelSource() {
    return
      "__kernel void corr_pairwise(\n"
      "  __global const float* feat,\n"
      "  __global float* outCorr,\n"
      "  const int nAssets,\n"
      "  const int nFeat,\n"
      "  const int windowSize,\n"
      "  const float eps\n"
      "){\n"
      "  int a = (int)get_global_id(0);\n"
      "  int b = (int)get_global_id(1);\n"
      "  if(a >= nAssets || b >= nAssets) return;\n"
      "  if(a >= b) return;\n"
      "  float acc = 0.0f;\n"
      "  for(int f=0; f<nFeat; f++){\n"
      "    int baseA = (f*nAssets + a) * windowSize;\n"
      "    int baseB = (f*nAssets + b) * windowSize;\n"
      "    float mx = 0.0f;\n"
      "    float my = 0.0f;\n"
      "    for(int t=0; t<windowSize; t++){\n"
      "      mx += feat[baseA + t];\n"
      "      my += feat[baseB + t];\n"
      "    }\n"
      "    mx /= (float)windowSize;\n"
      "    my /= (float)windowSize;\n"
      "    float sxx = 0.0f;\n"
      "    float syy = 0.0f;\n"
      "    float sxy = 0.0f;\n"
      "    for(int t=0; t<windowSize; t++){\n"
      "      float dx = feat[baseA + t] - mx;\n"
      "      float dy = feat[baseB + t] - my;\n"
      "      sxx += dx*dx;\n"
      "      syy += dy*dy;\n"
      "      sxy += dx*dy;\n"
      "    }\n"
      "    float den = sqrt(sxx*syy + eps);\n"
      "    float corr = (den > eps) ? (sxy/den) : 0.0f;\n"
      "    acc += corr;\n"
      "  }\n"
      "  outCorr[a*nAssets + b] = acc / (float)nFeat;\n"
      "}\n";
  }

  void printBuildLog() {
    if(!clGetProgramBuildInfo || !program || !device) return;
    size_t logSize = 0;
    clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
    if(logSize == 0) return;
    char* log = (char*)malloc(logSize + 1);
    if(!log) return;
    memset(log, 0, logSize + 1);
    clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
    printf("OpenCL build log:\n%s\n", log);
    free(log);
  }

  void init() {
    ready = 0;

    hOpenCL = LoadLibraryA("OpenCL.dll");
    if(!hOpenCL) {
      printf("OpenCL: CPU (OpenCL.dll missing)\n");
      return;
    }

    if(!loadSymbol((void**)&clGetPlatformIDs,       "clGetPlatformIDs")) return;
    if(!loadSymbol((void**)&clGetDeviceIDs,         "clGetDeviceIDs")) return;
    if(!loadSymbol((void**)&clCreateContext,        "clCreateContext")) return;
    if(!loadSymbol((void**)&clCreateCommandQueue,   "clCreateCommandQueue")) return;
    if(!loadSymbol((void**)&clCreateProgramWithSource,"clCreateProgramWithSource")) return;
    if(!loadSymbol((void**)&clBuildProgram,         "clBuildProgram")) return;
    if(!loadSymbol((void**)&clGetProgramBuildInfo,  "clGetProgramBuildInfo")) return;
    if(!loadSymbol((void**)&clCreateKernel,         "clCreateKernel")) return;
    if(!loadSymbol((void**)&clSetKernelArg,         "clSetKernelArg")) return;
    if(!loadSymbol((void**)&clCreateBuffer,         "clCreateBuffer")) return;
    if(!loadSymbol((void**)&clEnqueueWriteBuffer,   "clEnqueueWriteBuffer")) return;
    if(!loadSymbol((void**)&clEnqueueReadBuffer,    "clEnqueueReadBuffer")) return;
    if(!loadSymbol((void**)&clEnqueueNDRangeKernel, "clEnqueueNDRangeKernel")) return;
    if(!loadSymbol((void**)&clFinish,               "clFinish")) return;
    if(!loadSymbol((void**)&clReleaseMemObject,     "clReleaseMemObject")) return;
    if(!loadSymbol((void**)&clReleaseKernel,        "clReleaseKernel")) return;
    if(!loadSymbol((void**)&clReleaseProgram,       "clReleaseProgram")) return;
    if(!loadSymbol((void**)&clReleaseCommandQueue,  "clReleaseCommandQueue")) return;
    if(!loadSymbol((void**)&clReleaseContext,       "clReleaseContext")) return;

    cl_uint nPlat = 0;
    if(clGetPlatformIDs(0, NULL, &nPlat) != CL_SUCCESS || nPlat == 0) {
      printf("OpenCL: CPU (no platform)\n");
      return;
    }
    clGetPlatformIDs(1, &platform, NULL);

    cl_uint nDev = 0;
    cl_int ok = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &nDev);
    if(ok != CL_SUCCESS || nDev == 0) {
      ok = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, &nDev);
      if(ok != CL_SUCCESS || nDev == 0) {
        printf("OpenCL: CPU (no device)\n");
        return;
      }
    }

    cl_int err = 0;
    context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
    if(err != CL_SUCCESS || !context) {
      printf("OpenCL: CPU (context fail)\n");
      return;
    }

    queue = clCreateCommandQueue(context, device, 0, &err);
    if(err != CL_SUCCESS || !queue) {
      printf("OpenCL: CPU (queue fail)\n");
      return;
    }

    const char* src = kernelSource();
    program = clCreateProgramWithSource(context, 1, &src, NULL, &err);
    if(err != CL_SUCCESS || !program) {
      printf("OpenCL: CPU (program fail)\n");
      return;
    }

    err = clBuildProgram(program, 1, &device, "", NULL, NULL);
    if(err != CL_SUCCESS) {
      printf("OpenCL: CPU (build fail)\n");
      printBuildLog();
      return;
    }

    kCorr = clCreateKernel(program, "corr_pairwise", &err);
    if(err != CL_SUCCESS || !kCorr) {
      printf("OpenCL: CPU (kernel fail)\n");
      printBuildLog();
      return;
    }

    featBytes = FEAT_N * N_ASSETS * FEAT_WINDOW * (int)sizeof(float);
    corrBytes = N_ASSETS * N_ASSETS * (int)sizeof(float);

    bufFeat = clCreateBuffer(context, CL_MEM_READ_ONLY, (size_t)featBytes, NULL, &err);
    if(err != CL_SUCCESS || !bufFeat) {
      printf("OpenCL: CPU (bufFeat fail)\n");
      return;
    }

    bufCorr = clCreateBuffer(context, CL_MEM_WRITE_ONLY, (size_t)corrBytes, NULL, &err);
    if(err != CL_SUCCESS || !bufCorr) {
      printf("OpenCL: CPU (bufCorr fail)\n");
      return;
    }

    ready = 1;
    printf("OpenCL: READY (kernel+buffers)\n");
  }

  void shutdown() {
    if(bufCorr) { clReleaseMemObject(bufCorr); bufCorr = NULL; }
    if(bufFeat) { clReleaseMemObject(bufFeat); bufFeat = NULL; }
    if(kCorr) { clReleaseKernel(kCorr); kCorr = NULL; }
    if(program) { clReleaseProgram(program); program = NULL; }
    if(queue) { clReleaseCommandQueue(queue); queue = NULL; }
    if(context) { clReleaseContext(context); context = NULL; }
    if(hOpenCL) { FreeLibrary(hOpenCL); hOpenCL = NULL; }
    ready = 0;
  }

  int computeCorrelationMatrixCL(const float* featLinear, float* outCorr, int nAssets, int nFeat, int windowSize) {
    if(!ready) return 0;
    if(!featLinear || !outCorr) return 0;

    cl_int err = clEnqueueWriteBuffer(queue, bufFeat, CL_TRUE, 0, (size_t)featBytes, featLinear, 0, NULL, NULL);
    if(err != CL_SUCCESS) return 0;

    float eps = 1e-12f;
    err = CL_SUCCESS;
    err |= clSetKernelArg(kCorr, 0, sizeof(cl_mem), &bufFeat);
    err |= clSetKernelArg(kCorr, 1, sizeof(cl_mem), &bufCorr);
    err |= clSetKernelArg(kCorr, 2, sizeof(int), &nAssets);
    err |= clSetKernelArg(kCorr, 3, sizeof(int), &nFeat);
    err |= clSetKernelArg(kCorr, 4, sizeof(int), &windowSize);
    err |= clSetKernelArg(kCorr, 5, sizeof(float), &eps);
    if(err != CL_SUCCESS) return 0;

    size_t global[2];
    global[0] = (size_t)nAssets;
    global[1] = (size_t)nAssets;

    err = clEnqueueNDRangeKernel(queue, kCorr, 2, NULL, global, NULL, 0, NULL, NULL);
    if(err != CL_SUCCESS) return 0;

    err = clFinish(queue);
    if(err != CL_SUCCESS) return 0;

    err = clEnqueueReadBuffer(queue, bufCorr, CL_TRUE, 0, (size_t)corrBytes, outCorr, 0, NULL, NULL);
    if(err != CL_SUCCESS) return 0;

    return 1;
  }
};

// ---------------------------- Learning Layer ----------------------------

struct LearningSnapshot {
  double meanScore;
  double meanCompactness;
  double meanVol;
  int regime;
  double regimeConfidence;
};

class UnsupervisedModel {
public:
  double centroids[3][3];
  int counts[3];
  int initialized;

  UnsupervisedModel() : initialized(0) {
    memset(centroids, 0, sizeof(centroids));
    memset(counts, 0, sizeof(counts));
  }

  void init() {
    initialized = 0;
    memset(centroids, 0, sizeof(centroids));
    memset(counts, 0, sizeof(counts));
  }

  void update(const LearningSnapshot& s, int* regimeOut, double* confOut) {
    double x[3];
    x[0] = s.meanScore;
    x[1] = s.meanCompactness;
    x[2] = s.meanVol;

    if(!initialized) {
      for(int k=0; k<3; k++) {
        centroids[k][0] = x[0] + 0.01 * (k - 1);
        centroids[k][1] = x[1] + 0.01 * (1 - k);
        centroids[k][2] = x[2] + 0.005 * (k - 1);
        counts[k] = 1;
      }
      initialized = 1;
    }

    int best = 0;
    double bestDist = INF;
    double secondDist = INF;
    for(int k=0; k<3; k++) {
      double d0 = x[0] - centroids[k][0];
      double d1 = x[1] - centroids[k][1];
      double d2 = x[2] - centroids[k][2];
      double dist = d0*d0 + d1*d1 + d2*d2;
      if(dist < bestDist) {
        secondDist = bestDist;
        bestDist = dist;
        best = k;
      } else if(dist < secondDist) {
        secondDist = dist;
      }
    }

    counts[best]++;
    double lr = 1.0 / (double)counts[best];
    centroids[best][0] += lr * (x[0] - centroids[best][0]);
    centroids[best][1] += lr * (x[1] - centroids[best][1]);
    centroids[best][2] += lr * (x[2] - centroids[best][2]);

    *regimeOut = best;
    *confOut = 1.0 / (1.0 + sqrt(fabs(secondDist - bestDist) + EPS));
  }
};

class RLAgent {
public:
  double q[4];
  int n[4];
  double epsilon;
  int lastAction;
  double lastMeanScore;

  RLAgent() : epsilon(0.10), lastAction(0), lastMeanScore(0) {
    for(int i=0;i<4;i++){ q[i]=0; n[i]=0; }
  }

  void init() {
    epsilon = 0.10;
    lastAction = 0;
    lastMeanScore = 0;
    for(int i=0;i<4;i++){ q[i]=0; n[i]=0; }
  }

  int chooseAction(int updateCount) {
    int exploratory = ((updateCount % 10) == 0);
    if(exploratory) return updateCount % 4;
    int best = 0;
    for(int i=1;i<4;i++) if(q[i] > q[best]) best = i;
    return best;
  }

  void updateReward(double newMeanScore) {
    double reward = newMeanScore - lastMeanScore;
    n[lastAction]++;
    q[lastAction] += (reward - q[lastAction]) / (double)n[lastAction];
    lastMeanScore = newMeanScore;
  }
};

class PCAModel {
public:
  double hist[PCA_WINDOW][PCA_DIM];
  double mean[PCA_DIM];
  double stdev[PCA_DIM];
  double latent[PCA_COMP];
  double explainedVar[PCA_COMP];
  int writeIdx;
  int count;
  int rebuildEvery;
  int updates;
  double dom;
  double rot;
  double prevExplained0;

  PCAModel() : writeIdx(0), count(0), rebuildEvery(PCA_REBUILD_EVERY), updates(0), dom(0), rot(0), prevExplained0(0) {
    memset(hist, 0, sizeof(hist));
    memset(mean, 0, sizeof(mean));
    memset(stdev, 0, sizeof(stdev));
    memset(latent, 0, sizeof(latent));
    memset(explainedVar, 0, sizeof(explainedVar));
  }

  void init() {
    writeIdx = 0;
    count = 0;
    updates = 0;
    dom = 0;
    rot = 0;
    prevExplained0 = 0;
    memset(hist, 0, sizeof(hist));
    memset(mean, 0, sizeof(mean));
    memset(stdev, 0, sizeof(stdev));
    memset(latent, 0, sizeof(latent));
    memset(explainedVar, 0, sizeof(explainedVar));
  }

  void pushSnapshot(const double x[PCA_DIM]) {
    for(int d=0; d<PCA_DIM; d++) hist[writeIdx][d] = x[d];
    writeIdx = (writeIdx + 1) % PCA_WINDOW;
    if(count < PCA_WINDOW) count++;
  }

  void rebuildStats() {
    if(count <= 0) return;
    for(int d=0; d<PCA_DIM; d++) {
      double m = 0;
      for(int i=0; i<count; i++) m += hist[i][d];
      m /= (double)count;
      mean[d] = m;

      double v = 0;
      for(int i=0; i<count; i++) {
        double dd = hist[i][d] - m;
        v += dd * dd;
      }
      v /= (double)count;
      stdev[d] = sqrt(v + EPS);
    }
  }

  void update(const LearningSnapshot& snap, int regime, double conf) {
    double x[PCA_DIM];
    x[0] = snap.meanScore;
    x[1] = snap.meanCompactness;
    x[2] = snap.meanVol;
    x[3] = (double)regime / 2.0;
    x[4] = conf;
    x[5] = snap.meanScore - snap.meanCompactness;

    pushSnapshot(x);
    updates++;
    if((updates % rebuildEvery) == 0 || count < 4) rebuildStats();

    double z[PCA_DIM];
    for(int d=0; d<PCA_DIM; d++) z[d] = (x[d] - mean[d]) / (stdev[d] + EPS);

    latent[0] = 0.60*z[0] + 0.30*z[1] + 0.10*z[2];
    latent[1] = 0.25*z[0] - 0.45*z[1] + 0.20*z[2] + 0.10*z[4];
    latent[2] = 0.20*z[2] + 0.50*z[3] - 0.30*z[5];

    double a0 = fabs(latent[0]);
    double a1 = fabs(latent[1]);
    double a2 = fabs(latent[2]);
    double sumA = a0 + a1 + a2 + EPS;

    explainedVar[0] = a0 / sumA;
    explainedVar[1] = a1 / sumA;
    explainedVar[2] = a2 / sumA;

    dom = explainedVar[0];
    rot = fabs(explainedVar[0] - prevExplained0);
    prevExplained0 = explainedVar[0];
  }
};

class GMMRegimeModel {
public:
  double pi[GMM_K];
  double mu[GMM_K][GMM_DIM];
  double var[GMM_K][GMM_DIM];
  double p[GMM_K];
  double entropy;
  double conf;
  int bestRegime;
  int initialized;

  GMMRegimeModel() : entropy(0), conf(0), bestRegime(0), initialized(0) {
    memset(pi, 0, sizeof(pi));
    memset(mu, 0, sizeof(mu));
    memset(var, 0, sizeof(var));
    memset(p, 0, sizeof(p));
  }

  void init() {
    initialized = 0;
    entropy = 0;
    conf = 0;
    bestRegime = 0;
    for(int k=0;k<GMM_K;k++) {
      pi[k] = 1.0 / (double)GMM_K;
      for(int d=0; d<GMM_DIM; d++) {
        mu[k][d] = 0.02 * (k - 1);
        var[k][d] = 1.0;
      }
      p[k] = 1.0 / (double)GMM_K;
    }
    initialized = 1;
  }

  static double gaussianDiag(const double* x, const double* m, const double* v) {
    double logp = 0;
    for(int d=0; d<GMM_DIM; d++) {
      double vv = v[d];
      if(vv < GMM_VAR_FLOOR) vv = GMM_VAR_FLOOR;
      double z = x[d] - m[d];
      logp += -0.5 * (z*z / vv + log(vv + EPS));
    }
    if(logp < -80.0) logp = -80.0;
    return exp(logp);
  }

  void infer(const double x[GMM_DIM]) {
    if(!initialized) init();
    double sum = 0;
    for(int k=0;k<GMM_K;k++) {
      double g = gaussianDiag(x, mu[k], var[k]);
      p[k] = pi[k] * g;
      sum += p[k];
    }
    if(sum < EPS) {
      for(int k=0;k<GMM_K;k++) p[k] = 1.0 / (double)GMM_K;
    } else {
      for(int k=0;k<GMM_K;k++) p[k] /= sum;
    }

    bestRegime = 0;
    conf = p[0];
    for(int k=1;k<GMM_K;k++) {
      if(p[k] > conf) {
        conf = p[k];
        bestRegime = k;
      }
    }

    entropy = 0;
    for(int k=0;k<GMM_K;k++) entropy -= p[k] * log(p[k] + EPS);

#if GMM_ONLINE_UPDATE
    // lightweight incremental update (EM-like with forgetting)
    for(int k=0;k<GMM_K;k++) {
      double w = GMM_ALPHA * p[k];
      pi[k] = (1.0 - GMM_ALPHA) * pi[k] + w;
      for(int d=0; d<GMM_DIM; d++) {
        double diff = x[d] - mu[k][d];
        mu[k][d] += w * diff;
        var[k][d] = (1.0 - w) * var[k][d] + w * diff * diff;
        if(var[k][d] < GMM_VAR_FLOOR) var[k][d] = GMM_VAR_FLOOR;
      }
    }
#endif
  }
};


class HMMRegimeModel {
public:
  double A[HMM_K][HMM_K];
  double mu[HMM_K][HMM_DIM];
  double var[HMM_K][HMM_DIM];
  double posterior[HMM_K];
  double entropy;
  double conf;
  double switchProb;
  int regime;
  int initialized;

  HMMRegimeModel() : entropy(0), conf(0), switchProb(0), regime(0), initialized(0) {
    memset(A, 0, sizeof(A));
    memset(mu, 0, sizeof(mu));
    memset(var, 0, sizeof(var));
    memset(posterior, 0, sizeof(posterior));
  }

  void init() {
    for(int i=0;i<HMM_K;i++) {
      for(int j=0;j<HMM_K;j++) A[i][j] = (i==j) ? 0.90 : 0.10/(double)(HMM_K-1);
      for(int d=0; d<HMM_DIM; d++) {
        mu[i][d] = 0.03 * (i - 1);
        var[i][d] = 1.0;
      }
      posterior[i] = 1.0/(double)HMM_K;
    }
    regime = 0;
    conf = posterior[0];
    entropy = 0;
    switchProb = 0;
    initialized = 1;
  }

  static double emissionDiag(const double* x, const double* m, const double* v) {
    double logp = 0;
    for(int d=0; d<HMM_DIM; d++) {
      double vv = v[d];
      if(vv < HMM_VAR_FLOOR) vv = HMM_VAR_FLOOR;
      double z = x[d] - m[d];
      logp += -0.5 * (z*z / vv + log(vv + EPS));
    }
    if(logp < -80.0) logp = -80.0;
    return exp(logp);
  }

  void filter(const double obs[HMM_DIM]) {
    if(!initialized) init();

    double pred[HMM_K];
    for(int j=0;j<HMM_K;j++) {
      pred[j] = 0;
      for(int i=0;i<HMM_K;i++) pred[j] += posterior[i] * A[i][j];
    }

    double alpha[HMM_K];
    double sum = 0;
    for(int k=0;k<HMM_K;k++) {
      double emit = emissionDiag(obs, mu[k], var[k]);
      alpha[k] = pred[k] * emit;
      sum += alpha[k];
    }
    if(sum < EPS) {
      for(int k=0;k<HMM_K;k++) alpha[k] = 1.0/(double)HMM_K;
    } else {
      for(int k=0;k<HMM_K;k++) alpha[k] /= sum;
    }

    for(int k=0;k<HMM_K;k++) posterior[k] = alpha[k];

    regime = 0;
    conf = posterior[0];
    for(int k=1;k<HMM_K;k++) if(posterior[k] > conf) { conf = posterior[k]; regime = k; }

    entropy = 0;
    for(int k=0;k<HMM_K;k++) entropy -= posterior[k] * log(posterior[k] + EPS);

    switchProb = 1.0 - A[regime][regime];
    if(switchProb < 0) switchProb = 0;
    if(switchProb > 1) switchProb = 1;

#if HMM_ONLINE_UPDATE
    for(int k=0;k<HMM_K;k++) {
      double w = HMM_SMOOTH * posterior[k];
      for(int d=0; d<HMM_DIM; d++) {
        double diff = obs[d] - mu[k][d];
        mu[k][d] += w * diff;
        var[k][d] = (1.0 - w) * var[k][d] + w * diff * diff;
        if(var[k][d] < HMM_VAR_FLOOR) var[k][d] = HMM_VAR_FLOOR;
      }
    }
#endif
  }
};

class KMeansRegimeModel {
public:
  double centroids[KMEANS_K][KMEANS_DIM];
  double distEma;
  double distVarEma;
  int initialized;
  int regime;
  double dist;
  double stability;

  KMeansRegimeModel() : distEma(0), distVarEma(1), initialized(0), regime(0), dist(0), stability(0) {
    memset(centroids, 0, sizeof(centroids));
  }

  void init() {
    distEma = 0;
    distVarEma = 1;
    initialized = 0;
    regime = 0;
    dist = 0;
    stability = 0;
    memset(centroids, 0, sizeof(centroids));
  }

  void seed(const double x[KMEANS_DIM]) {
    for(int k=0;k<KMEANS_K;k++) {
      for(int d=0; d<KMEANS_DIM; d++) {
        centroids[k][d] = x[d] + 0.03 * (k - 1);
      }
    }
    initialized = 1;
  }

  static double clampRange(double x, double lo, double hi) {
    if(x < lo) return lo;
    if(x > hi) return hi;
    return x;
  }

  void predictAndUpdate(const double x[KMEANS_DIM]) {
    if(!initialized) seed(x);

    int best = 0;
    double bestDist = INF;
    for(int k=0;k<KMEANS_K;k++) {
      double s = 0;
      for(int d=0; d<KMEANS_DIM; d++) {
        double z = x[d] - centroids[k][d];
        s += z * z;
      }
      double dk = sqrt(s + EPS);
      if(dk < bestDist) {
        bestDist = dk;
        best = k;
      }
    }

    regime = best;
    dist = bestDist;

    distEma = (1.0 - KMEANS_DIST_EMA) * distEma + KMEANS_DIST_EMA * dist;
    double dd = dist - distEma;
    distVarEma = (1.0 - KMEANS_DIST_EMA) * distVarEma + KMEANS_DIST_EMA * dd * dd;
    double distStd = sqrt(distVarEma + EPS);
    double zDist = (dist - distEma) / (distStd + EPS);
    stability = clampRange(1.0 / (1.0 + exp(zDist)), 0.0, 1.0);

#if KMEANS_ONLINE_UPDATE
    for(int d=0; d<KMEANS_DIM; d++) {
      centroids[best][d] += KMEANS_ETA * (x[d] - centroids[best][d]);
    }
#endif
  }
};


class SpectralClusterModel {
public:
  int clusterId[N_ASSETS];
  int nClusters;

  void init() {
    nClusters = SPECTRAL_K;
    for(int i=0;i<N_ASSETS;i++) clusterId[i] = i % SPECTRAL_K;
  }

  void update(const fvar* distMatrix) {
    if(!distMatrix) return;
    // lightweight deterministic clustering surrogate from distance rows
    for(int i=0;i<N_ASSETS;i++) {
      double sig = 0;
      for(int j=0;j<N_ASSETS;j++) {
        if(i == j) continue;
        double d = (double)distMatrix[i*N_ASSETS + j];
        if(d < INF) sig += d;
      }
      int cid = (int)fmod(fabs(sig * 1000.0), (double)SPECTRAL_K);
      if(cid < 0) cid = 0;
      if(cid >= SPECTRAL_K) cid = SPECTRAL_K - 1;
      clusterId[i] = cid;
    }
  }
};


class HierarchicalClusteringModel {
public:
  int clusterCoarse[N_ASSETS];
  int clusterFine[N_ASSETS];
  int nCoarse;
  int nFine;

  int leftChild[2*N_ASSETS];
  int rightChild[2*N_ASSETS];
  int nodeSize[2*N_ASSETS];
  double nodeHeight[2*N_ASSETS];
  double nodeDist[2*N_ASSETS][2*N_ASSETS];
  int rootNode;

  void init() {
    nCoarse = HCLUST_COARSE_K;
    nFine = HCLUST_FINE_K;
    rootNode = N_ASSETS - 1;
    for(int i=0;i<N_ASSETS;i++) {
      clusterCoarse[i] = i % HCLUST_COARSE_K;
      clusterFine[i] = i % HCLUST_FINE_K;
    }
  }

  void collectLeaves(int node, int clusterId, int* out) {
    int stack[2*N_ASSETS];
    int sp = 0;
    stack[sp++] = node;
    while(sp > 0) {
      int cur = stack[--sp];
      if(cur < N_ASSETS) {
        out[cur] = clusterId;
      } else {
        if(leftChild[cur] >= 0) stack[sp++] = leftChild[cur];
        if(rightChild[cur] >= 0) stack[sp++] = rightChild[cur];
      }
    }
  }

  void cutByK(int K, int* out) {
    for(int i=0;i<N_ASSETS;i++) out[i] = -1;
    if(K <= 1) {
      for(int i=0;i<N_ASSETS;i++) out[i] = 0;
      return;
    }

    int clusters[2*N_ASSETS];
    int count = 1;
    clusters[0] = rootNode;

    while(count < K) {
      int bestPos = -1;
      double bestHeight = -1;
      for(int i=0;i<count;i++) {
        int node = clusters[i];
        if(node >= N_ASSETS && nodeHeight[node] > bestHeight) {
          bestHeight = nodeHeight[node];
          bestPos = i;
        }
      }
      if(bestPos < 0) break;
      int node = clusters[bestPos];
      int l = leftChild[node];
      int r = rightChild[node];
      clusters[bestPos] = l;
      clusters[count++] = r;
    }

    for(int c=0;c<count;c++) {
      collectLeaves(clusters[c], c, out);
    }
    for(int i=0;i<N_ASSETS;i++) if(out[i] < 0) out[i] = 0;
  }

  void update(const fvar* distMatrix) {
    if(!distMatrix) return;

    int totalNodes = 2 * N_ASSETS;
    for(int i=0;i<totalNodes;i++) {
      leftChild[i] = -1;
      rightChild[i] = -1;
      nodeSize[i] = (i < N_ASSETS) ? 1 : 0;
      nodeHeight[i] = 0;
      for(int j=0;j<totalNodes;j++) nodeDist[i][j] = INF;
    }

    for(int i=0;i<N_ASSETS;i++) {
      for(int j=0;j<N_ASSETS;j++) {
        if(i == j) nodeDist[i][j] = 0;
        else {
          double d = (double)distMatrix[i*N_ASSETS + j];
          if(d < 0 || d >= INF) d = 1.0;
          nodeDist[i][j] = d;
        }
      }
    }

    int active[2*N_ASSETS];
    int nActive = N_ASSETS;
    for(int i=0;i<N_ASSETS;i++) active[i] = i;
    int nextNode = N_ASSETS;

    while(nActive > 1 && nextNode < 2*N_ASSETS) {
      int ai = 0, aj = 1;
      double best = INF;
      for(int i=0;i<nActive;i++) {
        for(int j=i+1;j<nActive;j++) {
          int a = active[i], b = active[j];
          if(nodeDist[a][b] < best) {
            best = nodeDist[a][b];
            ai = i; aj = j;
          }
        }
      }

      int a = active[ai];
      int b = active[aj];
      int m = nextNode++;

      leftChild[m] = a;
      rightChild[m] = b;
      nodeHeight[m] = best;
      nodeSize[m] = nodeSize[a] + nodeSize[b];

      for(int i=0;i<nActive;i++) {
        if(i == ai || i == aj) continue;
        int k = active[i];
        double da = nodeDist[a][k];
        double db = nodeDist[b][k];
        double dm = (nodeSize[a] * da + nodeSize[b] * db) / (double)(nodeSize[a] + nodeSize[b]);
        nodeDist[m][k] = dm;
        nodeDist[k][m] = dm;
      }
      nodeDist[m][m] = 0;

      if(aj < ai) { int t=ai; ai=aj; aj=t; }
      for(int i=aj;i<nActive-1;i++) active[i] = active[i+1];
      nActive--;
      for(int i=ai;i<nActive-1;i++) active[i] = active[i+1];
      nActive--;
      active[nActive++] = m;
    }

    rootNode = active[0];

    int kc = HCLUST_COARSE_K;
    if(kc < 1) kc = 1;
    if(kc > N_ASSETS) kc = N_ASSETS;
    int kf = HCLUST_FINE_K;
    if(kf < 1) kf = 1;
    if(kf > N_ASSETS) kf = N_ASSETS;

    cutByK(kc, clusterCoarse);
    cutByK(kf, clusterFine);
    nCoarse = kc;
    nFine = kf;
  }
};


class CommunityDetectionModel {
public:
  int communityId[N_ASSETS];
  int clusterCoarse[N_ASSETS];
  int clusterFine[N_ASSETS];
  int nCommunities;
  fvar modularityQ;
  fvar qSmooth;

  void init() {
    nCommunities = 1;
    modularityQ = 0;
    qSmooth = 0;
    for(int i=0;i<N_ASSETS;i++) {
      communityId[i] = 0;
      clusterCoarse[i] = i % HCLUST_COARSE_K;
      clusterFine[i] = i % HCLUST_FINE_K;
    }
  }

  static int argmaxLabel(const fvar w[N_ASSETS], const int label[N_ASSETS], int node) {
    fvar acc[N_ASSETS];
    for(int i=0;i<N_ASSETS;i++) acc[i] = 0;
    for(int j=0;j<N_ASSETS;j++) {
      if(j == node) continue;
      int l = label[j];
      if(l < 0 || l >= N_ASSETS) continue;
      acc[l] += w[j];
    }
    int best = label[node];
    fvar bestV = -1;
    for(int l=0;l<N_ASSETS;l++) {
      if(acc[l] > bestV) { bestV = acc[l]; best = l; }
    }
    return best;
  }

  void update(const fvar* corrMatrix, const fvar* distMatrix) {
    if(!corrMatrix || !distMatrix) return;

    fvar W[N_ASSETS][N_ASSETS];
    fvar degree[N_ASSETS];
    int label[N_ASSETS];

    for(int i=0;i<N_ASSETS;i++) {
      degree[i] = 0;
      label[i] = i;
      for(int j=0;j<N_ASSETS;j++) {
        if(i == j) W[i][j] = 0;
        else {
          fvar w = (fvar)fabs((double)corrMatrix[i*N_ASSETS + j]);
          if(w < (fvar)COMM_W_MIN) w = 0;
          W[i][j] = w;
          degree[i] += w;
        }
      }
    }

    // Optional top-M pruning for determinism/noise control
    for(int i=0;i<N_ASSETS;i++) {
      int keep[N_ASSETS];
      for(int j=0;j<N_ASSETS;j++) keep[j] = 0;
      for(int k=0;k<COMM_TOPM;k++) {
        int best = -1;
        fvar bestW = 0;
        for(int j=0;j<N_ASSETS;j++) {
          if(i==j || keep[j]) continue;
          if(W[i][j] > bestW) { bestW = W[i][j]; best = j; }
        }
        if(best >= 0) keep[best] = 1;
      }
      for(int j=0;j<N_ASSETS;j++) if(i!=j && !keep[j]) W[i][j] = 0;
    }

    for(int it=0; it<COMM_ITERS; it++) {
      for(int i=0;i<N_ASSETS;i++) {
        label[i] = argmaxLabel(W[i], label, i);
      }
    }

    // compress labels
    int map[N_ASSETS];
    for(int i=0;i<N_ASSETS;i++) map[i] = -1;
    int nLab = 0;
    for(int i=0;i<N_ASSETS;i++) {
      int l = label[i];
      if(l < 0 || l >= N_ASSETS) l = 0;
      if(map[l] < 0) map[l] = nLab++;
      communityId[i] = map[l];
    }
    if(nLab < 1) nLab = 1;
    nCommunities = nLab;

    // modularity approximation
    fvar m2 = 0;
    for(int i=0;i<N_ASSETS;i++) for(int j=0;j<N_ASSETS;j++) m2 += W[i][j];
    if(m2 < (fvar)EPS) {
      modularityQ = 0;
    } else {
      fvar q = 0;
      for(int i=0;i<N_ASSETS;i++) {
        for(int j=0;j<N_ASSETS;j++) {
          if(communityId[i] == communityId[j]) {
            q += W[i][j] - (degree[i] * degree[j] / m2);
          }
        }
      }
      modularityQ = q / m2;
    }

    qSmooth = (fvar)(1.0 - COMM_Q_EMA) * qSmooth + (fvar)COMM_Q_EMA * modularityQ;

    for(int i=0;i<N_ASSETS;i++) {
      int c = communityId[i];
      if(c < 0) c = 0;
      clusterCoarse[i] = c % HCLUST_COARSE_K;
      clusterFine[i] = c % HCLUST_FINE_K;
    }
  }
};


class AutoencoderModel {
public:
  double mu[AE_INPUT_DIM];
  double sigma[AE_INPUT_DIM];
  double W1[AE_LATENT_DIM][AE_INPUT_DIM];
  double W2[AE_INPUT_DIM][AE_LATENT_DIM];
  int initialized;

  void init() {
    initialized = 1;
    for(int i=0;i<AE_INPUT_DIM;i++) {
      mu[i] = 0;
      sigma[i] = 1;
    }
    for(int z=0;z<AE_LATENT_DIM;z++) {
      for(int d=0;d<AE_INPUT_DIM;d++) {
        double w = sin((double)(z+1)*(d+1)) * 0.05;
        W1[z][d] = w;
        W2[d][z] = w;
      }
    }
  }

  static double act(double x) {
    if(x > 4) x = 4;
    if(x < -4) x = -4;
    return tanh(x);
  }

  double infer(const double xIn[AE_INPUT_DIM]) {
    if(!initialized) init();

    double x[AE_INPUT_DIM];
    for(int d=0;d<AE_INPUT_DIM;d++) x[d] = (xIn[d] - mu[d]) / (sigma[d] + EPS);

    double z[AE_LATENT_DIM];
    for(int k=0;k<AE_LATENT_DIM;k++) {
      double s = 0;
      for(int d=0;d<AE_INPUT_DIM;d++) s += W1[k][d] * x[d];
      z[k] = act(s);
    }

    double recon[AE_INPUT_DIM];
    for(int d=0;d<AE_INPUT_DIM;d++) {
      double s = 0;
      for(int k=0;k<AE_LATENT_DIM;k++) s += W2[d][k] * z[k];
      recon[d] = act(s);
    }

    double err = 0;
    for(int d=0;d<AE_INPUT_DIM;d++) {
      double e = x[d] - recon[d];
      err += e*e;
    }
    err /= (double)AE_INPUT_DIM;

    for(int d=0;d<AE_INPUT_DIM;d++) {
      mu[d] = (1.0 - AE_NORM_ALPHA) * mu[d] + AE_NORM_ALPHA * xIn[d];
      double dv = xIn[d] - mu[d];
      sigma[d] = (1.0 - AE_NORM_ALPHA) * sigma[d] + AE_NORM_ALPHA * sqrt(dv*dv + EPS);
      if(sigma[d] < 1e-5) sigma[d] = 1e-5;
    }
    return err;
  }
};

class NoveltyController {
public:
  double errEma;
  double errVar;
  double zRecon;
  int regime;
  double riskScale;

  void init() {
    errEma = 0;
    errVar = 1;
    zRecon = 0;
    regime = 0;
    riskScale = 1.0;
  }

  static double clampRange(double x, double lo, double hi) {
    if(x < lo) return lo;
    if(x > hi) return hi;
    return x;
  }

  void update(double reconError) {
    errEma = (1.0 - AE_ERR_EMA) * errEma + AE_ERR_EMA * reconError;
    double d = reconError - errEma;
    errVar = (1.0 - AE_ERR_EMA) * errVar + AE_ERR_EMA * d*d;
    double errStd = sqrt(errVar + EPS);
    zRecon = (reconError - errEma) / (errStd + EPS);

    if(zRecon >= AE_Z_HIGH) { regime = 2; riskScale = 0.20; }
    else if(zRecon >= AE_Z_LOW) { regime = 1; riskScale = 0.60; }
    else { regime = 0; riskScale = 1.00; }

    riskScale = clampRange(riskScale, 0.20, 1.00);
  }

  void apply(int* topK, double* scoreScale) {
    if(regime == 2) {
      if(*topK > 3) *topK -= 2;
      *scoreScale *= 0.60;
    } else if(regime == 1) {
      if(*topK > 3) *topK -= 1;
      *scoreScale *= 0.85;
    }
    if(*topK < 1) *topK = 1;
    if(*topK > TOP_K) *topK = TOP_K;
    *scoreScale = clampRange(*scoreScale, 0.10, 2.00);
  }
};

class StrategyController {
public:
  UnsupervisedModel unsup;
  RLAgent rl;
  PCAModel pca;
  GMMRegimeModel gmm;
  HMMRegimeModel hmm;
  KMeansRegimeModel kmeans;
  int dynamicTopK;
  double scoreScale;
  int regime;
  double adaptiveGamma;
  double adaptiveAlpha;
  double adaptiveBeta;
  double adaptiveLambda;
  double riskScale;
  int cooldown;

  StrategyController()
  : dynamicTopK(TOP_K), scoreScale(1.0), regime(0),
    adaptiveGamma(1.0), adaptiveAlpha(1.0), adaptiveBeta(1.0), adaptiveLambda(1.0), riskScale(1.0), cooldown(0) {}

  static double clampRange(double x, double lo, double hi) {
    if(x < lo) return lo;
    if(x > hi) return hi;
    return x;
  }

  void init() {
    unsup.init();
    rl.init();
    pca.init();
    gmm.init();
    hmm.init();
    kmeans.init();
    dynamicTopK = TOP_K;
    scoreScale = 1.0;
    regime = 0;
    adaptiveGamma = 1.0;
    adaptiveAlpha = 1.0;
    adaptiveBeta = 1.0;
    adaptiveLambda = 1.0;
    riskScale = 1.0;
    cooldown = 0;
  }

  void buildGMMState(const LearningSnapshot& snap, int reg, double conf, double x[GMM_DIM]) {
    x[0] = snap.meanScore;
    x[1] = snap.meanCompactness;
    x[2] = snap.meanVol;
    x[3] = pca.dom;
    x[4] = pca.rot;
    x[5] = (double)reg / 2.0;
    x[6] = conf;
    x[7] = snap.meanScore - snap.meanCompactness;
  }

  void buildHMMObs(const LearningSnapshot& snap, int reg, double conf, double x[HMM_DIM]) {
    x[0] = pca.latent[0];
    x[1] = pca.latent[1];
    x[2] = pca.latent[2];
    x[3] = snap.meanVol;
    x[4] = snap.meanScore;
    x[5] = snap.meanCompactness;
    x[6] = (double)reg / 2.0;
    x[7] = conf;
  }

  void buildKMeansState(const LearningSnapshot& snap, int reg, double conf, double x[KMEANS_DIM]) {
    x[0] = pca.latent[0];
    x[1] = pca.latent[1];
    x[2] = pca.latent[2];
    x[3] = snap.meanVol;
    x[4] = snap.meanScore;
    x[5] = snap.meanCompactness;
    x[6] = (double)reg / 2.0;
    x[7] = conf;
  }

  void onUpdate(const LearningSnapshot& snap, fvar* scores, int nScores, int updateCount) {
#if USE_ML
    double unsupConf = 0;
    unsup.update(snap, &regime, &unsupConf);
#if USE_PCA
    pca.update(snap, regime, unsupConf);
#else
    pca.dom = 0.5;
    pca.rot = 0.0;
#endif

#if USE_GMM
    double gx[GMM_DIM];
    buildGMMState(snap, regime, unsupConf, gx);
    gmm.infer(gx);
#if USE_HMM
    double hx[HMM_DIM];
    buildHMMObs(snap, regime, unsupConf, hx);
    hmm.filter(hx);
#if USE_KMEANS
    double kx[KMEANS_DIM];
    buildKMeansState(snap, regime, unsupConf, kx);
    kmeans.predictAndUpdate(kx);
#endif
#endif
    // regime presets: [gamma, alpha, beta, lambda]
    const double presets[GMM_K][4] = {
      {1.05, 1.00, 0.95, 1.00},
      {0.95, 1.05, 1.05, 0.95},
      {1.00, 0.95, 1.10, 1.05}
    };
    adaptiveGamma = 0;
    adaptiveAlpha = 0;
    adaptiveBeta  = 0;
    adaptiveLambda = 0;
    for(int k=0;k<GMM_K;k++) {
#if USE_HMM
      adaptiveGamma += hmm.posterior[k] * presets[k][0];
      adaptiveAlpha += hmm.posterior[k] * presets[k][1];
      adaptiveBeta  += hmm.posterior[k] * presets[k][2];
      adaptiveLambda += hmm.posterior[k] * presets[k][3];
#else
      adaptiveGamma += gmm.p[k] * presets[k][0];
      adaptiveAlpha += gmm.p[k] * presets[k][1];
      adaptiveBeta  += gmm.p[k] * presets[k][2];
      adaptiveLambda += gmm.p[k] * presets[k][3];
#endif
    }
#if USE_HMM
    double entNorm = hmm.entropy / log((double)HMM_K + EPS);
    riskScale = clampRange(1.0 - 0.45 * entNorm, HMM_MIN_RISK, 1.0);
    if(hmm.entropy > HMM_ENTROPY_TH || hmm.switchProb > HMM_SWITCH_TH) cooldown = HMM_COOLDOWN_UPDATES;
    else if(cooldown > 0) cooldown--;
#else
    double entNorm = gmm.entropy / log((double)GMM_K + EPS);
    riskScale = clampRange(1.0 - GMM_ENTROPY_COEFF * entNorm, GMM_MIN_RISK, 1.0);
#endif
#else
    adaptiveGamma = 1.0 + 0.35 * pca.dom - 0.25 * pca.rot;
    adaptiveAlpha = 1.0 + 0.30 * pca.dom;
    adaptiveBeta  = 1.0 + 0.25 * pca.rot;
    adaptiveLambda = 1.0 + 0.20 * pca.dom - 0.20 * pca.rot;
    riskScale = 1.0;
#endif

    adaptiveGamma = clampRange(adaptiveGamma, 0.80, 1.40);
    adaptiveAlpha = clampRange(adaptiveAlpha, 0.85, 1.35);
    adaptiveBeta  = clampRange(adaptiveBeta, 0.85, 1.35);
    adaptiveLambda = clampRange(adaptiveLambda, 0.85, 1.25);

#if USE_KMEANS
    const double kmPreset[KMEANS_K][4] = {
      {1.02, 1.00, 0.98, 1.00},
      {1.08, 0.96, 0.95, 1.02},
      {0.94, 1.08, 1.08, 0.92}
    };
    int kr = kmeans.regime;
    if(kr < 0) kr = 0;
    if(kr >= KMEANS_K) kr = KMEANS_K - 1;
    double wkm = clampRange(kmeans.stability, 0.0, 1.0);
    adaptiveGamma = (1.0 - wkm) * adaptiveGamma + wkm * kmPreset[kr][0];
    adaptiveAlpha = (1.0 - wkm) * adaptiveAlpha + wkm * kmPreset[kr][1];
    adaptiveBeta  = (1.0 - wkm) * adaptiveBeta  + wkm * kmPreset[kr][2];
    adaptiveLambda = (1.0 - wkm) * adaptiveLambda + wkm * kmPreset[kr][3];
    if(kmeans.stability < KMEANS_STABILITY_MIN) {
      riskScale *= 0.85;
      if(cooldown < 1) cooldown = 1;
    }
#endif

    rl.updateReward(snap.meanScore);
    rl.lastAction = rl.chooseAction(updateCount);

    int baseTopK = TOP_K;
    if(rl.lastAction == 0) baseTopK = TOP_K - 2;
    else if(rl.lastAction == 1) baseTopK = TOP_K;
    else if(rl.lastAction == 2) baseTopK = TOP_K;
    else baseTopK = TOP_K - 1;

    double profileBias[5] = {1.00, 0.98, 0.99, 0.97, 1.02};
    scoreScale = (1.0 + 0.06 * (adaptiveGamma - 1.0) + 0.04 * (adaptiveAlpha - 1.0) - 0.04 * (adaptiveBeta - 1.0))
               * profileBias[STRATEGY_PROFILE] * riskScale;

    if(pca.dom > 0.60) baseTopK -= 1;
    if(pca.rot > 0.15) baseTopK -= 1;
#if USE_HMM
    if(hmm.regime == 2) baseTopK -= 1;
    if(cooldown > 0) baseTopK -= 1;
#if USE_KMEANS
    if(kmeans.regime == 2) baseTopK -= 1;
#endif
#elif USE_GMM
    if(gmm.bestRegime == 2) baseTopK -= 1;
#endif

    dynamicTopK = baseTopK;
    if(dynamicTopK < 1) dynamicTopK = 1;
    if(dynamicTopK > TOP_K) dynamicTopK = TOP_K;

    for(int i=0; i<nScores; i++) {
      double s = (double)scores[i] * scoreScale;
      if(s > 1.0) s = 1.0;
      if(s < 0.0) s = 0.0;
      scores[i] = (fvar)s;
    }
#else
    (void)snap; (void)scores; (void)nScores; (void)updateCount;
#endif
  }
};

// ---------------------------- Strategy ----------------------------

class CompactDominantStrategy {
public:
  ExposureTable exposureTable;
  FeatureBufferSoA featSoA;
  OpenCLBackend openCL;

  SlabAllocator<fvar> corrMatrix;
  SlabAllocator<fvar> distMatrix;
  SlabAllocator<fvar> compactness;
  SlabAllocator<fvar> scores;

  SlabAllocator<float> featLinear;
  SlabAllocator<float> corrLinear;

  int barCount;
  int updateCount;
  StrategyController controller;
  HierarchicalClusteringModel hclust;
  CommunityDetectionModel comm;
  AutoencoderModel ae;
  NoveltyController novelty;

  CompactDominantStrategy() : barCount(0), updateCount(0) {}

  void init() {
    printf("CompactDominant_v12: Initializing...\n");

    exposureTable.init();
    featSoA.init(N_ASSETS, FEAT_WINDOW);

    corrMatrix.init(N_ASSETS * N_ASSETS);
    distMatrix.init(N_ASSETS * N_ASSETS);
    compactness.init(N_ASSETS);
    scores.init(N_ASSETS);

    featLinear.init(FEAT_N * N_ASSETS * FEAT_WINDOW);
    corrLinear.init(N_ASSETS * N_ASSETS);

    openCL.init();
    printf("CompactDominant_v12: Ready (OpenCL=%d)\n", openCL.ready);
    controller.init();
    hclust.init();
    comm.init();
    ae.init();
    novelty.init();

    barCount = 0;
    updateCount = 0;
  }

  void shutdown() {
    printf("CompactDominant_v12: Shutting down...\n");

    openCL.shutdown();

    featSoA.shutdown();
    corrMatrix.shutdown();
    distMatrix.shutdown();
    compactness.shutdown();
    scores.shutdown();

    featLinear.shutdown();
    corrLinear.shutdown();
  }

  void computeFeatures(int assetIdx) {
    asset((char*)ASSET_NAMES[assetIdx]);

    vars C = series(priceClose(0));
    vars V = series(Volatility(C, 20));

    if(Bar < 50) return;

    fvar r1 = (fvar)log(C[0] / C[1]);
    fvar rN = (fvar)log(C[0] / C[12]);
    fvar vol = (fvar)V[0];
    fvar zscore = (fvar)((C[0] - C[50]) / (V[0] * 20.0 + EPS));
    fvar rangeP = (fvar)((C[0] - C[50]) / (C[0] + EPS));
    fvar flow = (fvar)(r1 * vol);
    fvar regime = (fvar)((vol > 0.001) ? 1.0 : 0.0);
    fvar volOfVol = (fvar)(vol * vol);
    fvar persistence = (fvar)fabs(r1);

    featSoA.push(0, assetIdx, r1);
    featSoA.push(1, assetIdx, rN);
    featSoA.push(2, assetIdx, vol);
    featSoA.push(3, assetIdx, zscore);
    featSoA.push(4, assetIdx, rangeP);
    featSoA.push(5, assetIdx, flow);
    featSoA.push(6, assetIdx, regime);
    featSoA.push(7, assetIdx, volOfVol);
    featSoA.push(8, assetIdx, persistence);
  }

  void computeCorrelationMatrixCPU() {
    for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrMatrix[i] = 0;

    for(int f=0; f<FEAT_N; f++){
      for(int a=0; a<N_ASSETS; a++){
        for(int b=a+1; b<N_ASSETS; b++){
          fvar mx = 0, my = 0;
          for(int t=0; t<FEAT_WINDOW; t++){
            mx += featSoA.get(f,a,t);
            my += featSoA.get(f,b,t);
          }
          mx /= (fvar)FEAT_WINDOW;
          my /= (fvar)FEAT_WINDOW;

          fvar sxx = 0, syy = 0, sxy = 0;
          for(int t=0; t<FEAT_WINDOW; t++){
            fvar dx = featSoA.get(f,a,t) - mx;
            fvar dy = featSoA.get(f,b,t) - my;
            sxx += dx*dx;
            syy += dy*dy;
            sxy += dx*dy;
          }

          fvar den = (fvar)sqrt((double)(sxx*syy + (fvar)EPS));
          fvar corr = 0;
          if(den > (fvar)EPS) corr = sxy / den;
          else corr = 0;

          int idx = a*N_ASSETS + b;
          corrMatrix[idx] += corr / (fvar)FEAT_N;
          corrMatrix[b*N_ASSETS + a] = corrMatrix[idx];
        }
      }
    }
  }

  void buildFeatLinear() {
    int idx = 0;
    for(int f=0; f<FEAT_N; f++){
      for(int a=0; a<N_ASSETS; a++){
        for(int t=0; t<FEAT_WINDOW; t++){
          featLinear[idx] = (float)featSoA.get(f, a, t);
          idx++;
        }
      }
    }
  }

  void computeCorrelationMatrix() {
    if(openCL.ready) {
      buildFeatLinear();

      for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrLinear[i] = 0.0f;

      int ok = openCL.computeCorrelationMatrixCL(
        featLinear.data,
        corrLinear.data,
        N_ASSETS,
        FEAT_N,
        FEAT_WINDOW
      );

      if(ok) {
        for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrMatrix[i] = (fvar)0;

        for(int a=0; a<N_ASSETS; a++){
          corrMatrix[a*N_ASSETS + a] = (fvar)1.0;
          for(int b=a+1; b<N_ASSETS; b++){
            float c = corrLinear[a*N_ASSETS + b];
            corrMatrix[a*N_ASSETS + b] = (fvar)c;
            corrMatrix[b*N_ASSETS + a] = (fvar)c;
          }
        }
        return;
      }

      printf("OpenCL: runtime fail -> CPU fallback\n");
      openCL.ready = 0;
    }

    computeCorrelationMatrixCPU();
  }

  void computeDistanceMatrix() {
    for(int i=0;i<N_ASSETS;i++){
      for(int j=0;j<N_ASSETS;j++){
        if(i == j) {
          distMatrix[i*N_ASSETS + j] = (fvar)0;
        } else {
          fvar corrDist = (fvar)1.0 - (fvar)fabs((double)corrMatrix[i*N_ASSETS + j]);
          fvar expDist  = (fvar)exposureTable.getDist(i, j);
          fvar blended = (fvar)LAMBDA_META * corrDist + (fvar)(1.0 - (double)LAMBDA_META) * expDist;
          distMatrix[i*N_ASSETS + j] = blended;
        }
      }
    }
  }

  void floydWarshall() {
    fvar d[28][28];

    for(int i=0;i<N_ASSETS;i++){
      for(int j=0;j<N_ASSETS;j++){
        d[i][j] = distMatrix[i*N_ASSETS + j];
        if(i == j) d[i][j] = (fvar)0;
        if(d[i][j] < (fvar)0) d[i][j] = (fvar)INF;
      }
    }

    for(int k=0;k<N_ASSETS;k++){
      for(int i=0;i<N_ASSETS;i++){
        for(int j=0;j<N_ASSETS;j++){
          if(d[i][k] < (fvar)INF && d[k][j] < (fvar)INF) {
            fvar nk = d[i][k] + d[k][j];
            if(nk < d[i][j]) d[i][j] = nk;
          }
        }
      }
    }

    for(int i=0;i<N_ASSETS;i++){
      fvar w = 0;
      for(int j=i+1;j<N_ASSETS;j++){
        if(d[i][j] < (fvar)INF) w += d[i][j];
      }
      if(w > (fvar)0) compactness[i] = (fvar)(1.0 / (1.0 + (double)w));
      else compactness[i] = (fvar)0;
    }
  }

  void computeScores() {
    for(int i=0;i<N_ASSETS;i++){
      fvar coupling = 0;
      int count = 0;

      for(int j=0;j<N_ASSETS;j++){
        if(i != j && distMatrix[i*N_ASSETS + j] < (fvar)INF) {
          coupling += compactness[j];
          count++;
        }
      }

      fvar pCouple = 0;
      if(count > 0) pCouple = coupling / (fvar)count;
      else pCouple = (fvar)0;

      fvar regime = featSoA.get(6, i, 0);
      fvar rawScore = (fvar)ALPHA * regime + (fvar)GAMMA * compactness[i] - (fvar)BETA * pCouple;

      if(rawScore > (fvar)30) rawScore = (fvar)30;
      if(rawScore < (fvar)-30) rawScore = (fvar)-30;

      scores[i] = (fvar)(1.0 / (1.0 + exp(-(double)rawScore)));
    }
  }

  LearningSnapshot buildSnapshot() {
    LearningSnapshot s;
    s.meanScore = 0;
    s.meanCompactness = 0;
    s.meanVol = 0;
    for(int i=0;i<N_ASSETS;i++) {
      s.meanScore += (double)scores[i];
      s.meanCompactness += (double)compactness[i];
      s.meanVol += (double)featSoA.get(2, i, 0);
    }
    s.meanScore /= (double)N_ASSETS;
    s.meanCompactness /= (double)N_ASSETS;
    s.meanVol /= (double)N_ASSETS;
    s.regime = 0;
    s.regimeConfidence = 0;
    return s;
  }

  void onBar() {
    barCount++;

    for(int i=0;i<N_ASSETS;i++) computeFeatures(i);

    if(barCount % UPDATE_EVERY == 0) {
      updateCount++;

      computeCorrelationMatrix();
      computeDistanceMatrix();
#if USE_COMMUNITY
      hclust.update(distMatrix.data);
#endif
#if USE_COMMUNITY
      comm.update(corrMatrix.data, distMatrix.data);
#endif
      floydWarshall();
      computeScores();
      controller.onUpdate(buildSnapshot(), scores.data, N_ASSETS, updateCount);
#if USE_AE
      double aeState[AE_INPUT_DIM];
      double ms=0, mc=0, mv=0;
      for(int i=0;i<N_ASSETS;i++){ ms += (double)scores[i]; mc += (double)compactness[i]; mv += (double)featSoA.get(2, i, 0); }
      ms /= (double)N_ASSETS; mc /= (double)N_ASSETS; mv /= (double)N_ASSETS;
      aeState[0] = ms;
      aeState[1] = mc;
      aeState[2] = mv;
      aeState[3] = controller.scoreScale;
      aeState[4] = (double)controller.dynamicTopK;
      aeState[5] = (double)barCount / (double)(LookBack + 1);
      aeState[6] = (double)updateCount / 1000.0;
      aeState[7] = (double)openCL.ready;
      double reconErr = ae.infer(aeState);
      novelty.update(reconErr);
      novelty.apply(&controller.dynamicTopK, &controller.scoreScale);
      for(int i=0;i<N_ASSETS;i++){{
        double s = (double)scores[i] * novelty.riskScale;
        if(s > 1.0) s = 1.0;
        if(s < 0.0) s = 0.0;
        scores[i] = (fvar)s;
      }}
#endif
      printTopK();
    }
  }

  void printTopK() {
    int indices[N_ASSETS];
    for(int i=0;i<N_ASSETS;i++) indices[i] = i;

    int topN = controller.dynamicTopK;
#if USE_COMMUNITY
    if(comm.qSmooth < (fvar)COMM_Q_LOW && topN > 2) topN--;
    if(comm.qSmooth > (fvar)COMM_Q_HIGH && topN < TOP_K) topN++;
#endif
    for(int i=0;i<topN;i++){
      for(int j=i+1;j<N_ASSETS;j++){
        if(scores[indices[j]] > scores[indices[i]]) {
          int tmp = indices[i];
          indices[i] = indices[j];
          indices[j] = tmp;
        }
      }
    }

    if(updateCount % 10 == 0) {
      printf("===CompactDominant_v12 Top-K(update#%d,OpenCL=%d)===\n",
        updateCount, openCL.ready);
#if USE_COMMUNITY
      printf(" communities=%d Q=%.4f\n", comm.nCommunities, (double)comm.qSmooth);
#endif

      int selected[N_ASSETS];
      int selCount = 0;
#if USE_COMMUNITY
      int coarseUsed[HCLUST_COARSE_K];
      int fineTake[HCLUST_FINE_K];
      int fineCap = (topN + HCLUST_FINE_K - 1) / HCLUST_FINE_K;
      for(int c=0;c<HCLUST_COARSE_K;c++) coarseUsed[c] = 0;
      for(int c=0;c<HCLUST_FINE_K;c++) fineTake[c] = 0;

      for(int i=0;i<topN;i++){
        int idx = indices[i];
        int cid = comm.clusterCoarse[idx];
        if(cid < 0 || cid >= HCLUST_COARSE_K) cid = 0;
        if(coarseUsed[cid]) continue;
        coarseUsed[cid] = 1;
        selected[selCount++] = idx;
        int fid = comm.clusterFine[idx];
        if(fid < 0 || fid >= HCLUST_FINE_K) fid = 0;
        fineTake[fid]++;
      }

      for(int i=0;i<topN && selCount<topN;i++){
        int idx = indices[i];
        int dup = 0;
        for(int k=0;k<selCount;k++) if(selected[k]==idx){ dup=1; break; }
        if(dup) continue;
        int fid = comm.clusterFine[idx];
        if(fid < 0 || fid >= HCLUST_FINE_K) fid = 0;
        if(fineTake[fid] >= fineCap) continue;
        selected[selCount++] = idx;
        fineTake[fid]++;
      }
#else
      for(int i=0;i<topN;i++) selected[selCount++] = indices[i];
#endif
      for(int i=0;i<selCount;i++){
        int idx = selected[i];
        printf(" %d.%s: score=%.4f, C=%.4f\n", i+1, ASSET_NAMES[idx], (double)scores[idx], (double)compactness[idx]);
      }
    }
  }
};

// ---------------------------- Zorro DLL entry ----------------------------

static CompactDominantStrategy* S = NULL;

DLLFUNC void run()
{
  if(is(INITRUN)) {
    BarPeriod = 60;
    LookBack = max(LookBack, FEAT_WINDOW + 50);

    asset((char*)ASSET_NAMES[0]);

    if(!S) {
      S = new CompactDominantStrategy();
      S->init();
    }
  }

  if(is(EXITRUN)) {
    if(S) {
      S->shutdown();
      delete S;
      S = NULL;
    }
    return;
  }

  if(!S || Bar < LookBack)
    return;

  S->onBar();
}