|
1 registered members (clint000),
9,315
guests, and 0
spiders. |
|
Key:
Admin,
Global Mod,
Mod
|
|
|
|
Yesterday at 00:09
In case it helps, this is the Log from the Gateway the first time Zorro connects to it after a restart.
2026-03-12 22:43:01.971 [TC] INFO [JTS-SocketListener-55] - State: HEADER, IsAPI: UNKNOWN 2026-03-12 22:43:02.007 [TC] INFO [JTS-SocketListener-55] - State: STOP, IsAPI: YES 2026-03-12 22:43:02.522 [TC] INFO [JTS-SocketListener-55] - ArEServer: Adding 105670462 with id - 2026-03-12 22:43:02.599 [TC] INFO [JTS-SocketListener-55] - eServersChanged: 1 2026-03-12 22:43:02.828 [TC] INFO [JTS-EServerSocketNotifier-101] - Starting async queue thread 2026-03-12 22:43:02.864 [TC] INFO [JTS-EServerSocket-100] - [-:180:180:1:0:0:0:SYS] Starting new conversation with client on 127.0.0.1 2026-03-12 22:43:02.903 [TC] INFO [JTS-EServerSocket-100] - [-:180:180:1:0:0:0:SYS] Server version is 180 2026-03-12 22:43:02.903 [TC] INFO [JTS-EServerSocket-100] - [-:180:180:1:0:0:0:SYS] Client version is 180 2026-03-12 22:43:02.906 [TC] INFO [JTS-EServerSocket-100] - [-:180:180:1:0:0:0:SYS] is 3rdParty true 2026-03-12 22:43:06.839 [TC] INFO [JTS-EServerSocket-100] - Ignoring API request 'jextend.bk' since API is not accepted. 2026-03-12 22:43:06.846 [TC] INFO [JTS-EServerSocket-100] - ArEServer: Removing 105670462 with id - 2026-03-12 22:43:06.848 [TC] INFO [JTS-EServerSocket-100] - eServersChanged: 0 2026-03-12 22:43:06.897 [TC] INFO [JTS-EServerSocketNotifier-101] - Terminating async queue thread 2026-03-12 22:43:06.976 [TC] INFO [JTS-EServerSocket-100] - Cleaning up [serverId: -]... 2026-03-12 22:43:07.063 [TC] INFO [JTS-EServerSocket-100] - Cleaning up [serverId: -]... 2026-03-12 22:43:07.135 [TC] INFO [JTS-EServerSocket-100] - Cleaning up [serverId: -]... 2026-03-12 22:43:07.172 [TC] INFO [JTS-EServerSocket-100] - [ApiAccountAdditionMgr] Cleaning up [serverId=-]... 2026-03-12 22:43:07.226 [TC] INFO [JTS-EServerSocket-100] - ============ EServerSocket messages stats ============ 2026-03-12 22:43:07.228 [TC] INFO [JTS-EServerSocket-100] - Number of socket messages pushed to updates queue: 0 2026-03-12 22:43:07.228 [TC] INFO [JTS-EServerSocket-100] - Number of socket messages sent directly: 0 2026-03-12 22:43:07.229 [TC] INFO [JTS-EServerSocket-100] - ======================================================= 2026-03-12 22:43:07.235 [TC] INFO [JTS-EWriter1-102] - [-:180:180:1:0:0:0:SYS] Writer thread terminated for socket client{-}. 2026-03-12 22:43:07.269 [TC] INFO [JTS-EServerSocket-100] - There are no API orders being processed. 2026-03-12 22:43:07.269 [TC] INFO [JTS-EServerSocket-100] - SecDefRequestTimeOut: There are no timed out SecDef requests. 2026-03-12 22:43:07.278 [TC] INFO [JTS-EServerSocket-100] - UnhandledOrderPresetRequest: There are no unprocessed order preset requests. 2026-03-12 22:43:07.279 [TC] INFO [JTS-EServerSocket-100] - [-:180:180:1:0:0:0:SYS] Ending conversation with client{-} at 127.0.0.1
After I click on Stop and then Trade again, it works. This is the log:
2026-03-12 22:45:44.196 [TC] INFO [JTS-SocketListener-55] - State: HEADER, IsAPI: UNKNOWN 2026-03-12 22:45:44.197 [TC] INFO [JTS-SocketListener-55] - State: STOP, IsAPI: YES 2026-03-12 22:45:44.205 [TC] INFO [JTS-SocketListener-55] - ArEServer: Adding 1513967017 with id - 2026-03-12 22:45:44.205 [TC] INFO [JTS-SocketListener-55] - eServersChanged: 1 2026-03-12 22:45:44.210 [TC] INFO [JTS-EServerSocket-105] - [-:180:180:1:0:0:0:SYS] Starting new conversation with client on 127.0.0.1 2026-03-12 22:45:44.210 [TC] INFO [JTS-EServerSocketNotifier-106] - Starting async queue thread 2026-03-12 22:45:44.210 [TC] INFO [JTS-EServerSocket-105] - [-:180:180:1:0:0:0:SYS] Server version is 180 2026-03-12 22:45:44.210 [TC] INFO [JTS-EServerSocket-105] - [-:180:180:1:0:0:0:SYS] Client version is 180 2026-03-12 22:45:44.211 [TC] INFO [JTS-EServerSocket-105] - [-:180:180:1:0:0:0:SYS] is 3rdParty true 2026-03-12 22:45:44.531 [TC] INFO [JTS-EServerSocket-105] - Start API message, ClientID=1 2026-03-12 22:45:48.732 [TC] INFO [JTS-EServerSocket-105] - FAController: set useBackEndAliases to false 2026-03-12 22:45:48.760 [TC] INFO [JTS-EMsgPacer-109] - Starting compressor 2026-03-12 22:45:48.767 [TC] INFO [JTS-EMsgPacer-109] - Shutting down compressor. FlatLen:0 2026-03-12 22:45:48.768 [TC] INFO [JTS-EMsgPacer-109] - CM setting requested API account spec to DUO493542.(API) 2026-03-12 22:45:48.783 [TC] INFO [JTS-EMsgPacer-109] - CM Sending current account image DUO493542 to API 2026-03-12 22:45:49.304 [TC] INFO [JTS-QuickMktDataWrapper-112] - Farm cashfarm/UNCONNECTED: Creating ServiceConnection for:cashfarm 2026-03-12 22:45:49.305 [TC] INFO [JTS-QuickMktDataWrapper-112] - Farm cashfarm/UNCONNECTED: Starting connection thread 2026-03-12 22:45:49.305 [TC] INFO [JTS-QuickMktDataWrapper-112] - Added message to queue [serviceName=cashfarm,state=STARTING,size=1]... 2026-03-12 22:45:49.374 [TC] INFO [JTS-cashfarmServiceConnector-114] - Farm cashfarm/STARTING: Connecting natively (cashfarm)... 2026-03-12 22:45:49.375 [TC] INFO [JTS-cashfarmServiceConnector-114] - InnerConnection.connectOrTransfer()[endPoint=ndc1.ibllc.com:4000,serviceType=MARKET_DATA,farmName=cashfarm,usesCcpConman=false]... 2026-03-12 22:45:49.386 [TC] INFO [JTS-cashfarmServiceConnector-114] - InnerConnection.updateEndPointIfNeeded()[endPoint=ndc1.ibllc.com:4000,serviceType=MARKET_DATA,farmName=cashfarm,mainConnUseSsl=true,useSslFarmList=]. 2026-03-12 22:45:49.388 [TC] INFO [JTS-cashfarmServiceConnector-114] - Connecting ndc1.ibllc.com:4000 timeout=0... 2026-03-12 22:45:49.388 [TC] INFO [JTS-cashfarmServiceConnector-114] - CONN CALC: Last connection set as now:22:45:49:388 2026-03-12 22:45:49.489 [TC] INFO [JTS-cashfarmServiceConnector-114] - Connected to ndc1.ibllc.com:4000/64.190.197.40 remote-ip on local port 0 socket local addrs:port /38.141.14.52:60428 2026-03-12 22:45:49.490 [TC] INFO [JTS-cashfarmServiceConnector-114] - Updating machine info [connectionAddress=/38.141.14.52]... 2026-03-12 22:45:49.492 [TC] INFO [JTS-cashfarmServiceConnector-114] - Farm cashfarm/PRE_NATIVE: Connected, sending secure connect and/or auth 2026-03-12 22:45:49.492 [TC] INFO [JTS-cashfarmServiceConnector-114] - Added to valid hosts list: ndc1.ibllc.com 2026-03-12 22:45:49.495 [TC] INFO [JTS-cashfarmListenerS10-116] - Starting listener thread [sessionID=10]... 2026-03-12 22:45:49.511 [TC] INFO [JTS-cashfarmServiceConnector-114] - InnerConnection.onConnected() [sessionID=10,secureConnectEnabled=true,logonReplySigned=false]. 2026-03-12 22:45:49.520 [TC] INFO [JTS-cashfarmListenerS10-116] - Initial listener buffer size is 131072. 2026-03-12 22:45:49.520 [TC] INFO [JTS-cashfarmListenerS10-116] - Capping listener buffer size at 65536. 2026-03-12 22:45:49.531 [TC] INFO [JTS-cashfarmListenerS10-116] - Listener thread [sessionID=10] is ready to process messages. 2026-03-12 22:45:49.532 [TC] INFO [JTS-cashfarmDispatcherS10-117S10-118] - Starting dispatcher thread [sessionID=10]...
24
4,963
Read More
|
|
|
03/12/26 09:00
Hello JCL, I confirm, 3.01.6 resolves the problem, thank you, cheers !!
6
214
Read More
|
|
|
03/11/26 21:44
OK, very good, many thanks!
However when the connection is red, the first time I click on Trade it connects to the API but it doesn't receive any data (times out and Zorro shows errors 011, 046 and 047 as it doesn't receive any data from the API). Then I need to click Stop and Trade again and the second time it does connect to the API and is able to retrieve data normally.
DId you also experience this issue?
Thanks
24
4,963
Read More
|
|
|
03/11/26 13:30
Yes, with the new broker command SET_RESTART.
24
4,963
Read More
|
|
|
03/11/26 11:44
Put on the to do list. The new evaluation shell can select folders, but that happens currently by selecting a file inside that folder.
1
107
Read More
|
|
|
03/10/26 21:30
Ok, thanks for the confirmation. In order for Zorro S 3.01 to re-connect to the Gateway, does it need any user intervention?
At the moment, when the connection status shows red and I restart the gateway manually, the Zorro connection keeps on being red. It is only when I stop and restart trading in Zorro (Stop --> Trade) that it reconnects and shows as green. I can manage to restart the Gateway automatically every day with IBC and a scheduled task, but it would be great if there was a way for Zorro to re-connect automatically?
Thanks!
24
4,963
Read More
|
|
|
03/10/26 12:47
We can confirm the Gateway issue. The automated Gateway restart leaves it in an unstable state. It does not accept an API connection in that state. But if you close and restart the Gateway manually, Zorro can connect.
24
4,963
Read More
|
|
|
03/10/26 12:28
Update: The reason of the problem was found. We have uploaded a new version, 3.01.6.
6
214
Read More
|
|
|
03/10/26 09:29
Ah, ok. It did not happen here, but if it's in trade mode, it can be indeed related to the brokers history. That would be a bug. We'll look into that and will probably release a new version.
6
214
Read More
|
|
|
03/10/26 07:16
Hi, thank you for response. I updated to 3.01.5 the release date February 22. Plugin Tradier, history data loaded with no problem, Z9 in Trade mode the same Error 047.
6
214
Read More
|
|
|
03/10/26 00:17
Hi jcl,
I get the same error with Z13. I have found the IB plugin isn't backfilling with other strategies too.
Can confirm the issue still exists with 3.01.5.
Thanks, Adam
6
214
Read More
|
|
|
03/09/26 12:47
Please check your Zorro version. The current version is 3.01.5. AFAIK 3.01.4 was the latest release candidate. The file date of Zorro,exe should be February 22.
6
214
Read More
|
|
03/08/26 18:50
To all our community members, everyone is welcome to enjoy the new project; https://github.com/KoplaNum/StableArchetypeLibraryStableArchetypeLibrary is a modular C++ neural visual engine for Zorro. It combines LibTorch archetype initialization, OpenCL per-pixel inference, and OpenGL real-time rendering in a 100-equation pipeline. Includes stable Mendb11 baseline plus visual patch variants for controlled generative experiments. Everyone is welcome to contribute. The Next one will be Big.
210
65,908
Read More
|
|
03/06/26 07:18
This code is a hybrid visual computation system that turns an evolving internal state into animated graphics through the cooperation of several layers of software and hardware. At its core, it is not simply a graphics program and not simply a neural network demo. It is better understood as a symbolic machine for transforming synthetic randomness into structured visual behavior. The program opens a native Windows rendering window, builds an OpenGL drawing surface, attaches an OpenCL compute pipeline to that surface, initializes a small neural network through LibTorch, and then uses all of those pieces together to generate a continuously changing image. The result is an engine in which a neural system, a random process, and a graphics pipeline become different expressions of the same evolving state. The first important idea in the design is separation of roles. LibTorch is used to define and initialize the neural network. It provides the model structure, the layer weights, and the tensor-compatible machinery for generating the initial parameters of the tiny multilayer perceptron. OpenCL is used as the numerical worker that evaluates the network at image scale. Rather than asking the CPU to run the network for every pixel, the program sends the weights and contextual inputs to an OpenCL kernel, which computes the color field in parallel on the graphics device. OpenGL then serves as the presentation layer. It does not perform the neural reasoning itself. Instead, it receives the finished pixel buffer and displays it as a texture on a full-screen quad. In symbolic terms, LibTorch provides the form of thought, OpenCL provides the act of distributed evaluation, and OpenGL provides the visible body of the result. A second important idea is that the image is not based on market data or a fixed deterministic simulation. The network is conditioned by synthetic state variables that are themselves derived from evolving non-deterministic internal values. These variables are named regime, volatility, trend, and risk. They do not correspond to actual trading statistics in this version. Instead, they are abstract latent descriptors computed from the current random seed, the animation phase, and the changing neural parameters themselves. This gives the output a deeper structure than simple noise. Each frame is no longer only a reaction to pixel position and per-pixel jitter. It becomes the visual manifestation of a global internal condition. In that sense, the program behaves like a synthetic cognitive field, where each point in the image is influenced both by local coordinates and by a shared hidden mood. The program begins with careful include ordering because it combines many libraries that can conflict with one another. LibTorch is included first, then Zorro, and macros that might cause name collisions are cleaned up afterward. This is a practical but very important architectural detail. It reflects that the file is meant to compile inside the Zorro ecosystem while also depending on modern machine learning and graphics toolchains. A small but important note is that to compile this successfully with Zorro64, local directories containing the needed DLL files and library dependencies must be available and correctly configured. In practice, this usually means that LibTorch runtime DLLs, OpenCL support, and any required graphics-related binaries must be reachable through local compiler and runtime paths. Without those local dependency directories, the code may compile incorrectly or fail at load time even if the source itself is valid. The OpenGL initialization part constructs the visible world. A Win32 window is created, a device context is obtained, and a WGL context is attached. The code then dynamically loads the OpenGL buffer functions needed for pixel buffer object support. This is essential because the rendered image is not drawn point by point through traditional immediate graphics commands. Instead, a pixel buffer object is allocated as a block of GPU-visible memory, and a texture is created to receive that data. The texture becomes the canvas that OpenGL displays every frame. This means the visible image is really the endpoint of a dataflow pipeline, not a manually painted scene. The neural network is intentionally small. It has six input channels, one hidden layer, and three outputs that are later interpreted as red, green, and blue color tendencies. The six inputs include the two spatial coordinates together with the four frame-level context values. This turns the network into a context-conditioned generator rather than a simple coordinate mapper. In abstract terms, the network learns a function from place and latent state into color. Since the weights are not trained on a dataset here, their meaning emerges from initialization and subsequent internal evolution. That gives the system an experimental character. It is less like a classifier and more like a dynamic symbolic organ whose activity is made visible. LibTorch is used only on the host side to instantiate the network and extract its initial weights and biases. Once those parameters are copied into plain arrays, they are transferred into OpenCL buffers. From that point on, the network is evaluated inside the OpenCL kernel for every pixel. That division is mathematically elegant. The high-level neural definition exists in LibTorch, but the large-scale field evaluation is delegated to a massively parallel compute layer. This means the neural model has a dual existence: as a conceptual structure in LibTorch and as a numerical stencil in OpenCL. One defines the architecture, the other enacts it across space. The OpenCL kernel is where the local and global levels meet. For each pixel, the kernel computes normalized coordinates, derives a deterministic jitter from the current seed and pixel index, then constructs the six neural inputs. The first two are simply position. The remaining four are the shared context channels, slightly modulated by local oscillation and jitter. This is a key conceptual improvement over a purely local shader. The frame now possesses a coherent internal theme because all pixels are influenced by the same synthetic regime, volatility, trend, and risk state. At the same time, local perturbations preserve texture and detail. This balance between shared condition and local variation is what gives the output the feeling of organized complexity. After the hidden layer and output layer are evaluated, the network outputs are transformed into color components. These are then mixed with radial shading, stripes, and oscillatory modulation. This means the final picture is not a raw neural output. It is a composition between neural activation and geometric post-processing. Symbolically, the network provides the semantic field, while the handcrafted spatial operators provide the visual grammar. The result is a blend of learned structure and procedural ornament. Another major feature is that the neural parameters themselves evolve over time. The code packs all weights and biases into a single parameter vector and updates them through a recurrence that combines neighbor coupling, slow drift, bounded noise, and weak mean-reversion-like corrections. This makes the network a dynamic object rather than a frozen model. Its internal configuration changes from frame to frame, and the random context variables are partly derived from the statistical properties of those changing parameters. In effect, the network influences the context, and the context influences how the network is evaluated. This creates a feedback loop. Even though there is no training objective in the usual sense, the system still exhibits a kind of endogenous evolution. That feedback loop is the deepest symbolic aspect of the code. The neural network is not merely being used as a function approximator. It becomes part of a self-modulating visual dynamical system. The parameter field changes over time, the derived context summarizes aspects of that changing field, the context is fed back into the neural inputs, and the resulting image becomes the visible trace of the current internal condition. This is why the code can be described as an engine of symbolic stochastic expression rather than just a renderer. The WinMain loop keeps this whole machine alive. It pumps messages, maintains the window, allows graceful shutdown, and calls the rendering function repeatedly. Zorro is only used here as the hosting environment and lifecycle controller. The exported main function prevents repeated relaunches and ensures the visual process runs once as intended inside Zorro64. That makes the code both a standalone visual machine and a Zorro-compatible DLL-based component. In summary, this program is a layered computational artwork and experimental system. It combines machine learning initialization, GPU parallel evaluation, graphics interop, procedural modulation, and self-evolving stochastic state. Its symbolic name, The Stochastic Prism Engine, fits because the code takes hidden random structure and refracts it into visible organized color. It is a prism not for light alone, but for latent computational state. // Mendb02.cpp
// Win32 + WGL(OpenGL) display + OpenCL compute (CL/GL sharing)
// + Tiny Neural Net inference per pixel (OpenCL kernel) using weights from LibTorch
// + Random-context-conditioned rendering: x, y, regime, volatility, trend, risk
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#define _CRT_SECURE_NO_WARNINGS
// ============================================================
// 1) Include LibTorch FIRST
// ============================================================
#if defined(__has_include)
#if __has_include(<torch/torch.h>) && __has_include(<torch/script.h>)
#include <torch/torch.h>
#include <torch/script.h>
#else
#error "LibTorch headers not found. Add LibTorch include paths to your compiler settings."
#endif
#else
#include <torch/torch.h>
#include <torch/script.h>
#endif
// Optional CUDA headers
#if defined(__has_include)
#if __has_include(<torch/cuda.h>)
#include <torch/cuda.h>
#define HAVE_TORCH_CUDA_HEADER 1
#else
#define HAVE_TORCH_CUDA_HEADER 0
#endif
#if __has_include(<cuda_runtime_api.h>)
#include <cuda_runtime_api.h>
#define HAVE_CUDA_RUNTIME_API_HEADER 1
#else
#define HAVE_CUDA_RUNTIME_API_HEADER 0
#endif
#else
#define HAVE_TORCH_CUDA_HEADER 0
#define HAVE_CUDA_RUNTIME_API_HEADER 0
#endif
#if defined(__has_include)
#if __has_include(<c10/cuda/CUDAGuard.h>) && __has_include(<c10/cuda/impl/cuda_cmake_macros.h>)
#include <c10/cuda/CUDAGuard.h>
#define HAVE_C10_CUDAGUARD 1
#else
#define HAVE_C10_CUDAGUARD 0
#endif
#else
#define HAVE_C10_CUDAGUARD 0
#endif
// ============================================================
// 2) Standard headers
// ============================================================
#include <windows.h>
#include <stdio.h>
#include <math.h>
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <stdint.h>
// ============================================================
// 3) Include Zorro AFTER torch, rename Zorro's 'at'
// ============================================================
#define at zorro_at
#ifdef LOG
#undef LOG
#endif
#include <zorro.h>
#undef at
// ============================================================
// 4) Cleanup macro landmines
// ============================================================
#ifdef min
#undef min
#endif
#ifdef max
#undef max
#endif
#ifdef ref
#undef ref
#endif
#ifdef swap
#undef swap
#endif
#ifdef abs
#undef abs
#endif
#ifdef NTF
#undef NTF
#endif
#ifdef LOOKBACK
#undef LOOKBACK
#endif
#ifdef BINS
#undef BINS
#endif
// ============================================================
// OpenCL + OpenGL includes
// ============================================================
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <CL/cl_gl_ext.h>
#include <GL/gl.h>
#ifndef GL_RGBA8
#define GL_RGBA8 0x8058
#endif
// ------------------------- Globals -------------------------
static HWND gHwnd = 0;
static HDC gHdc = 0;
static HGLRC gHgl = 0;
static int gW = 640;
static int gH = 480;
static float gPhase = 0.0f;
static unsigned int gNoiseSeed = 1u;
struct RandomContext {
float regime;
float volatility;
float trend;
float risk;
};
static RandomContext gCtx = {0.0f, 0.0f, 0.0f, 0.0f};
static int read_env_int(const char* key, int fallback)
{
const char* s = getenv(key);
if(!s || !*s) return fallback;
int v = atoi(s);
return (v > 0) ? v : fallback;
}
// ------------------------- WinProc forward -------------------------
LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
// ===========================================================
// Minimal OpenGL function loading
// ===========================================================
#ifndef GL_ARRAY_BUFFER
#define GL_ARRAY_BUFFER 0x8892
#endif
#ifndef GL_PIXEL_UNPACK_BUFFER
#define GL_PIXEL_UNPACK_BUFFER 0x88EC
#endif
#ifndef GL_DYNAMIC_DRAW
#define GL_DYNAMIC_DRAW 0x88E8
#endif
#ifndef APIENTRY
#define APIENTRY __stdcall
#endif
#ifndef APIENTRYP
#define APIENTRYP APIENTRY *
#endif
typedef void (APIENTRYP PFNGLGENBUFFERSPROC)(GLsizei, GLuint*);
typedef void (APIENTRYP PFNGLBINDBUFFERPROC)(GLenum, GLuint);
typedef void (APIENTRYP PFNGLBUFFERDATAPROC)(GLenum, ptrdiff_t, const void*, GLenum);
typedef void (APIENTRYP PFNGLDELETEBUFFERSPROC)(GLsizei, const GLuint*);
static PFNGLGENBUFFERSPROC p_glGenBuffers = 0;
static PFNGLBINDBUFFERPROC p_glBindBuffer = 0;
static PFNGLBUFFERDATAPROC p_glBufferData = 0;
static PFNGLDELETEBUFFERSPROC p_glDeleteBuffers = 0;
static void* gl_get_proc(const char* name)
{
void* p = (void*)wglGetProcAddress(name);
if(!p) {
HMODULE ogl = GetModuleHandleA("opengl32.dll");
if(ogl) p = (void*)GetProcAddress(ogl, name);
}
return p;
}
static int gl_load_ext()
{
p_glGenBuffers = (PFNGLGENBUFFERSPROC)gl_get_proc("glGenBuffers");
p_glBindBuffer = (PFNGLBINDBUFFERPROC)gl_get_proc("glBindBuffer");
p_glBufferData = (PFNGLBUFFERDATAPROC)gl_get_proc("glBufferData");
p_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)gl_get_proc("glDeleteBuffers");
if(!p_glGenBuffers || !p_glBindBuffer || !p_glBufferData || !p_glDeleteBuffers)
return 0;
return 1;
}
// ===========================================================
// OpenGL objects
// ===========================================================
static GLuint gPBO = 0;
static GLuint gTex = 0;
static void gl_release_all()
{
if(gTex) {
glDeleteTextures(1, &gTex);
gTex = 0;
}
if(gPBO) {
if(p_glDeleteBuffers) p_glDeleteBuffers(1, &gPBO);
gPBO = 0;
}
if(gHgl) { wglMakeCurrent(NULL, NULL); wglDeleteContext(gHgl); gHgl = 0; }
if(gHdc && gHwnd) { ReleaseDC(gHwnd, gHdc); gHdc = 0; }
}
static int gl_init_wgl(HWND hwnd)
{
gHwnd = hwnd;
gHdc = GetDC(hwnd);
if(!gHdc) return 0;
PIXELFORMATDESCRIPTOR pfd;
ZeroMemory(&pfd, sizeof(pfd));
pfd.nSize = sizeof(pfd);
pfd.nVersion = 1;
pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
pfd.iPixelType = PFD_TYPE_RGBA;
pfd.cColorBits = 32;
pfd.cDepthBits = 16;
pfd.iLayerType = PFD_MAIN_PLANE;
int pf = ChoosePixelFormat(gHdc, &pfd);
if(pf == 0) return 0;
if(!SetPixelFormat(gHdc, pf, &pfd)) return 0;
gHgl = wglCreateContext(gHdc);
if(!gHgl) return 0;
if(!wglMakeCurrent(gHdc, gHgl)) return 0;
if(!gl_load_ext()) {
printf("\nOpenGL buffer functions not available (need VBO/PBO support).");
return 0;
}
glDisable(GL_DEPTH_TEST);
glViewport(0, 0, gW, gH);
p_glGenBuffers(1, &gPBO);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gPBO);
p_glBufferData(GL_PIXEL_UNPACK_BUFFER, (ptrdiff_t)(gW * gH * 4), 0, GL_DYNAMIC_DRAW);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glGenTextures(1, &gTex);
glBindTexture(GL_TEXTURE_2D, gTex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, gW, gH, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glBindTexture(GL_TEXTURE_2D, 0);
return 1;
}
// ===========================================================
// Tiny NN (LibTorch -> weights)
// ===========================================================
#define NN_IN 6
#define NN_H 16
#define NN_OUT 3
#define NN_PARAM_COUNT (NN_H*NN_IN + NN_H + NN_OUT*NN_H + NN_OUT)
static float gHost_W1[NN_H*NN_IN];
static float gHost_b1[NN_H];
static float gHost_W2[NN_OUT*NN_H];
static float gHost_b2[NN_OUT];
struct TinyMLPImpl : torch::nn::Module {
torch::nn::Linear fc1{nullptr}, fc2{nullptr};
TinyMLPImpl() {
fc1 = register_module("fc1", torch::nn::Linear(NN_IN, NN_H));
fc2 = register_module("fc2", torch::nn::Linear(NN_H, NN_OUT));
}
torch::Tensor forward(torch::Tensor x) {
x = torch::tanh(fc1->forward(x));
x = torch::tanh(fc2->forward(x));
return x;
}
};
TORCH_MODULE(TinyMLP);
static int build_weights_from_libtorch(float* W1, float* b1, float* W2, float* b2)
{
if(!W1 || !b1 || !W2 || !b2) return 0;
try {
torch::NoGradGuard ng;
torch::manual_seed((uint64_t)time(NULL) ^ (uint64_t)GetTickCount64());
TinyMLP m;
m->eval();
auto w1 = m->fc1->weight.detach().contiguous().to(torch::kCPU);
auto bb1 = m->fc1->bias.detach().contiguous().to(torch::kCPU);
auto w2 = m->fc2->weight.detach().contiguous().to(torch::kCPU);
auto bb2 = m->fc2->bias.detach().contiguous().to(torch::kCPU);
memcpy(W1, w1.data_ptr<float>(), sizeof(float)*NN_H*NN_IN);
memcpy(b1, bb1.data_ptr<float>(), sizeof(float)*NN_H);
memcpy(W2, w2.data_ptr<float>(), sizeof(float)*NN_OUT*NN_H);
memcpy(b2, bb2.data_ptr<float>(), sizeof(float)*NN_OUT);
return 1;
}
catch(const c10::Error& e) {
printf("\n[LibTorch] Error: %s", e.what());
return 0;
}
catch(...) {
printf("\n[LibTorch] Unknown error.");
return 0;
}
}
// ===========================================================
// OpenCL (GL sharing)
// ===========================================================
static int gCL_Ready = 0;
static cl_platform_id gCL_Platform = 0;
static cl_device_id gCL_Device = 0;
static cl_context gCL_Context = 0;
static cl_command_queue gCL_Queue = 0;
static cl_program gCL_Program = 0;
static cl_kernel gCL_K_NN = 0;
static cl_mem gCL_PBO = 0;
static cl_mem gCL_W1 = 0;
static cl_mem gCL_b1 = 0;
static cl_mem gCL_W2 = 0;
static cl_mem gCL_b2 = 0;
static void pack_params(float* theta);
static void unpack_params(const float* theta);
static void evolve_params_accumulated(float phase, unsigned int seed);
#define STR2(x) #x
#define XSTR(x) STR2(x)
static const char* gCL_Source =
"__kernel void nn_render(__global uchar4* out, int width, int height, \n"
" __global const float* W1, __global const float* b1, \n"
" __global const float* W2, __global const float* b2, \n"
" float phase, uint seed, \n"
" float ctxRegime, float ctxVol, float ctxTrend, float ctxRisk) \n"
"{ \n"
" int xpix = (int)get_global_id(0); \n"
" int ypix = (int)get_global_id(1); \n"
" if(xpix >= width || ypix >= height) return; \n"
" \n"
" float x = ((float)xpix / (float)(width - 1)) * 2.0f - 1.0f; \n"
" float y = ((float)ypix / (float)(height - 1)) * 2.0f - 1.0f; \n"
" uint n = (uint)(xpix*1973u) ^ (uint)(ypix*9277u) ^ (seed*26699u + 911u); \n"
" n = (n << 13) ^ n; \n"
" uint m = (n * (n*n*15731u + 789221u) + 1376312589u); \n"
" float jitter = ((float)(m & 0x00ffffffu) / 16777215.0f) * 2.0f - 1.0f; \n"
" \n"
" float in0 = x; \n"
" float in1 = y; \n"
" float in2 = clamp(ctxRegime + 0.20f*jitter + 0.15f*sin(phase + 3.0f*x), -1.0f, 1.0f); \n"
" float in3 = clamp(ctxVol + 0.25f*fabs(jitter) + 0.10f*cos(phase + 4.0f*y), -1.0f, 1.0f); \n"
" float in4 = clamp(ctxTrend + 0.15f*sin(2.0f*x - 1.5f*y + phase), -1.0f, 1.0f); \n"
" float in5 = clamp(ctxRisk + 0.10f*cos(3.0f*(x+y) - phase), -1.0f, 1.0f); \n"
" \n"
" float h[" XSTR(NN_H) "]; \n"
" for(int j=0;j<" XSTR(NN_H) ";j++){ \n"
" float acc = b1[j]; \n"
" acc += in0 * W1[j*" XSTR(NN_IN) " + 0]; \n"
" acc += in1 * W1[j*" XSTR(NN_IN) " + 1]; \n"
" acc += in2 * W1[j*" XSTR(NN_IN) " + 2]; \n"
" acc += in3 * W1[j*" XSTR(NN_IN) " + 3]; \n"
" acc += in4 * W1[j*" XSTR(NN_IN) " + 4]; \n"
" acc += in5 * W1[j*" XSTR(NN_IN) " + 5]; \n"
" h[j] = tanh(acc); \n"
" } \n"
" \n"
" float o[" XSTR(NN_OUT) "]; \n"
" for(int k=0;k<" XSTR(NN_OUT) ";k++){ \n"
" float acc = b2[k]; \n"
" for(int j=0;j<" XSTR(NN_H) ";j++){ \n"
" acc += h[j] * W2[k*" XSTR(NN_H) " + j]; \n"
" } \n"
" float s = 0.5f + 0.5f*tanh(acc); \n"
" if(s<0) s=0; if(s>1) s=1; \n"
" o[k] = s; \n"
" } \n"
" \n"
" float radial = sqrt(x*x + y*y); \n"
" float vignette = clamp(1.15f - radial, 0.0f, 1.0f); \n"
" float stripe = 0.5f + 0.5f*sin(10.0f*(x + y) + phase + 2.0f*jitter); \n"
" float rcol = clamp(0.70f*o[0] + 0.30f*stripe, 0.0f, 1.0f) * vignette; \n"
" float gcol = clamp(0.85f*o[1] + 0.15f*(1.0f - stripe), 0.0f, 1.0f) * vignette; \n"
" float bcol = clamp(0.75f*o[2] + 0.25f*(0.5f + 0.5f*cos(8.0f*x - phase)),0.0f,1.0f);\n"
" uchar r = (uchar)(255.0f*rcol); \n"
" uchar g = (uchar)(255.0f*gcol); \n"
" uchar b = (uchar)(255.0f*bcol); \n"
" out[ypix*width + xpix] = (uchar4)(r,g,b,255); \n"
"} \n";
static void cl_release_all()
{
if(gCL_b2) { clReleaseMemObject(gCL_b2); gCL_b2 = 0; }
if(gCL_W2) { clReleaseMemObject(gCL_W2); gCL_W2 = 0; }
if(gCL_b1) { clReleaseMemObject(gCL_b1); gCL_b1 = 0; }
if(gCL_W1) { clReleaseMemObject(gCL_W1); gCL_W1 = 0; }
if(gCL_PBO) { clReleaseMemObject(gCL_PBO); gCL_PBO = 0; }
if(gCL_K_NN) { clReleaseKernel(gCL_K_NN); gCL_K_NN = 0; }
if(gCL_Program) { clReleaseProgram(gCL_Program); gCL_Program = 0; }
if(gCL_Queue) { clReleaseCommandQueue(gCL_Queue);gCL_Queue = 0; }
if(gCL_Context) { clReleaseContext(gCL_Context); gCL_Context = 0; }
gCL_Device = 0;
gCL_Platform = 0;
gCL_Ready = 0;
}
static int cl_pick_device_with_glshare(cl_platform_id* outP, cl_device_id* outD)
{
cl_uint nPlatforms = 0;
if(clGetPlatformIDs(0, 0, &nPlatforms) != CL_SUCCESS || nPlatforms == 0)
return 0;
cl_platform_id platforms[8];
if(nPlatforms > 8) nPlatforms = 8;
if(clGetPlatformIDs(nPlatforms, platforms, &nPlatforms) != CL_SUCCESS)
return 0;
for(cl_uint p=0; p<nPlatforms; p++)
{
cl_uint nDev = 0;
if(clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_GPU, 0, 0, &nDev) != CL_SUCCESS || nDev == 0)
continue;
cl_device_id devs[8];
if(nDev > 8) nDev = 8;
if(clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_GPU, nDev, devs, &nDev) != CL_SUCCESS)
continue;
for(cl_uint d=0; d<nDev; d++)
{
char ext[8192];
size_t sz = 0;
if(clGetDeviceInfo(devs[d], CL_DEVICE_EXTENSIONS, sizeof(ext), ext, &sz) != CL_SUCCESS)
continue;
if(strstr(ext, "cl_khr_gl_sharing"))
{
*outP = platforms[p];
*outD = devs[d];
return 1;
}
}
}
return 0;
}
static int cl_init_glshare()
{
cl_int err = CL_SUCCESS;
cl_platform_id P = 0;
cl_device_id D = 0;
if(!cl_pick_device_with_glshare(&P, &D)) {
printf("\nOpenCL: no GPU device with cl_khr_gl_sharing found.");
return 0;
}
gCL_Platform = P;
gCL_Device = D;
cl_context_properties props[] = {
CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
CL_CONTEXT_PLATFORM, (cl_context_properties)gCL_Platform,
0
};
gCL_Context = clCreateContext(props, 1, &gCL_Device, 0, 0, &err);
if(err != CL_SUCCESS || !gCL_Context) { cl_release_all(); return 0; }
gCL_Queue = clCreateCommandQueue(gCL_Context, gCL_Device, 0, &err);
if(err != CL_SUCCESS || !gCL_Queue) { cl_release_all(); return 0; }
gCL_Program = clCreateProgramWithSource(gCL_Context, 1, &gCL_Source, 0, &err);
if(err != CL_SUCCESS || !gCL_Program) { cl_release_all(); return 0; }
err = clBuildProgram(gCL_Program, 1, &gCL_Device, 0, 0, 0);
if(err != CL_SUCCESS)
{
char logbuf[8192];
size_t logsz = 0;
clGetProgramBuildInfo(gCL_Program, gCL_Device, CL_PROGRAM_BUILD_LOG, sizeof(logbuf), logbuf, &logsz);
printf("\nOpenCL build failed:\n%s", logbuf);
cl_release_all();
return 0;
}
gCL_K_NN = clCreateKernel(gCL_Program, "nn_render", &err);
if(err != CL_SUCCESS || !gCL_K_NN) { cl_release_all(); return 0; }
gCL_PBO = clCreateFromGLBuffer(gCL_Context, CL_MEM_WRITE_ONLY, gPBO, &err);
if(err != CL_SUCCESS || !gCL_PBO) { cl_release_all(); return 0; }
size_t bytesW1 = sizeof(float)*(size_t)NN_H*(size_t)NN_IN;
size_t bytesb1 = sizeof(float)*(size_t)NN_H;
size_t bytesW2 = sizeof(float)*(size_t)NN_OUT*(size_t)NN_H;
size_t bytesb2 = sizeof(float)*(size_t)NN_OUT;
gCL_W1 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesW1, 0, &err);
gCL_b1 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesb1, 0, &err);
gCL_W2 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesW2, 0, &err);
gCL_b2 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesb2, 0, &err);
if(err != CL_SUCCESS || !gCL_W1 || !gCL_b1 || !gCL_W2 || !gCL_b2) { cl_release_all(); return 0; }
if(!build_weights_from_libtorch(gHost_W1, gHost_b1, gHost_W2, gHost_b2)) {
printf("\n[LibTorch] Failed to build weights.");
cl_release_all();
return 0;
}
err = clEnqueueWriteBuffer(gCL_Queue, gCL_W1, CL_TRUE, 0, bytesW1, gHost_W1, 0, 0, 0);
if(err != CL_SUCCESS) { cl_release_all(); return 0; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_b1, CL_TRUE, 0, bytesb1, gHost_b1, 0, 0, 0);
if(err != CL_SUCCESS) { cl_release_all(); return 0; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_W2, CL_TRUE, 0, bytesW2, gHost_W2, 0, 0, 0);
if(err != CL_SUCCESS) { cl_release_all(); return 0; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_b2, CL_TRUE, 0, bytesb2, gHost_b2, 0, 0, 0);
if(err != CL_SUCCESS) { cl_release_all(); return 0; }
gCL_Ready = 1;
return 1;
}
// ===========================================================
// Random context helpers
// ===========================================================
static float clampf(float x, float lo, float hi)
{
if(x < lo) return lo;
if(x > hi) return hi;
return x;
}
static float u32_to_unit(unsigned int x)
{
return (float)(x & 0x00FFFFFFu) / 16777215.0f;
}
static unsigned int mix_u32(unsigned int x)
{
x ^= x >> 16;
x *= 2246822519u;
x ^= x >> 13;
x *= 3266489917u;
x ^= x >> 16;
return x;
}
static void compute_random_context(float phase, unsigned int seed)
{
float meanW1 = 0.0f, meanb1 = 0.0f, meanW2 = 0.0f, meanb2 = 0.0f;
float absW1 = 0.0f, absW2 = 0.0f;
for(int i=0;i<NN_H*NN_IN;i++) {
meanW1 += gHost_W1[i];
absW1 += fabsf(gHost_W1[i]);
}
for(int i=0;i<NN_H;i++) {
meanb1 += gHost_b1[i];
}
for(int i=0;i<NN_OUT*NN_H;i++) {
meanW2 += gHost_W2[i];
absW2 += fabsf(gHost_W2[i]);
}
for(int i=0;i<NN_OUT;i++) {
meanb2 += gHost_b2[i];
}
meanW1 /= (float)(NN_H*NN_IN);
meanb1 /= (float)NN_H;
meanW2 /= (float)(NN_OUT*NN_H);
meanb2 /= (float)NN_OUT;
absW1 /= (float)(NN_H*NN_IN);
absW2 /= (float)(NN_OUT*NN_H);
unsigned int h0 = mix_u32(seed ^ 0xA341316Cu);
unsigned int h1 = mix_u32(seed ^ 0xC8013EA4u);
unsigned int h2 = mix_u32(seed ^ 0xAD90777Du);
unsigned int h3 = mix_u32(seed ^ 0x7E95761Eu);
float n0 = u32_to_unit(h0) * 2.0f - 1.0f;
float n1 = u32_to_unit(h1) * 2.0f - 1.0f;
float n2 = u32_to_unit(h2) * 2.0f - 1.0f;
float n3 = u32_to_unit(h3) * 2.0f - 1.0f;
gCtx.regime =
tanhf(0.9f*sinf(0.31f*phase) + 0.6f*cosf(0.17f*phase) + 0.35f*meanW1 + 0.20f*n0);
gCtx.volatility =
clampf(0.5f + 0.8f*absW1 + 0.6f*absW2 + 0.15f*n1, -1.0f, 1.0f);
gCtx.trend =
tanhf(1.4f*(meanW2 - meanW1) + 0.5f*sinf(0.12f*phase + 1.3f) + 0.20f*n2);
{
float raw =
0.8f*gCtx.regime - 0.6f*fabsf(gCtx.volatility) + 0.5f*gCtx.trend + 0.15f*n3;
gCtx.risk = tanhf(raw);
}
}
// ===========================================================
// Render (CL -> GL)
// ===========================================================
static void RenderFrame()
{
if(!gCL_Ready) return;
size_t global[2] = { (size_t)gW, (size_t)gH };
size_t local[2] = { 16, 16 };
cl_int err = CL_SUCCESS;
err = clEnqueueAcquireGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0);
if(err != CL_SUCCESS) return;
LARGE_INTEGER qpc;
QueryPerformanceCounter(&qpc);
gNoiseSeed = (unsigned int)(qpc.QuadPart ^ (qpc.QuadPart >> 32) ^ (LONGLONG)GetTickCount64());
evolve_params_accumulated(gPhase, gNoiseSeed);
compute_random_context(gPhase, gNoiseSeed);
size_t bytesW1 = sizeof(float)*(size_t)NN_H*(size_t)NN_IN;
size_t bytesb1 = sizeof(float)*(size_t)NN_H;
size_t bytesW2 = sizeof(float)*(size_t)NN_OUT*(size_t)NN_H;
size_t bytesb2 = sizeof(float)*(size_t)NN_OUT;
err = clEnqueueWriteBuffer(gCL_Queue, gCL_W1, CL_FALSE, 0, bytesW1, gHost_W1, 0, 0, 0);
if(err != CL_SUCCESS) { clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0); clFinish(gCL_Queue); return; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_b1, CL_FALSE, 0, bytesb1, gHost_b1, 0, 0, 0);
if(err != CL_SUCCESS) { clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0); clFinish(gCL_Queue); return; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_W2, CL_FALSE, 0, bytesW2, gHost_W2, 0, 0, 0);
if(err != CL_SUCCESS) { clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0); clFinish(gCL_Queue); return; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_b2, CL_FALSE, 0, bytesb2, gHost_b2, 0, 0, 0);
if(err != CL_SUCCESS) { clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0); clFinish(gCL_Queue); return; }
int arg = 0;
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_PBO);
clSetKernelArg(gCL_K_NN, arg++, sizeof(int), &gW);
clSetKernelArg(gCL_K_NN, arg++, sizeof(int), &gH);
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_W1);
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_b1);
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_W2);
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_b2);
clSetKernelArg(gCL_K_NN, arg++, sizeof(float), &gPhase);
clSetKernelArg(gCL_K_NN, arg++, sizeof(unsigned int), &gNoiseSeed);
clSetKernelArg(gCL_K_NN, arg++, sizeof(float), &gCtx.regime);
clSetKernelArg(gCL_K_NN, arg++, sizeof(float), &gCtx.volatility);
clSetKernelArg(gCL_K_NN, arg++, sizeof(float), &gCtx.trend);
clSetKernelArg(gCL_K_NN, arg++, sizeof(float), &gCtx.risk);
err = clEnqueueNDRangeKernel(gCL_Queue, gCL_K_NN, 2, 0, global, local, 0, 0, 0);
if(err != CL_SUCCESS) {
err = clEnqueueNDRangeKernel(gCL_Queue, gCL_K_NN, 2, 0, global, 0, 0, 0, 0);
}
clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0);
clFinish(gCL_Queue);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gPBO);
glBindTexture(GL_TEXTURE_2D, gTex);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, gW, gH, GL_RGBA, GL_UNSIGNED_BYTE, 0);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glClear(GL_COLOR_BUFFER_BIT);
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, gTex);
glBegin(GL_QUADS);
glTexCoord2f(0,0); glVertex2f(-1,-1);
glTexCoord2f(1,0); glVertex2f( 1,-1);
glTexCoord2f(1,1); glVertex2f( 1, 1);
glTexCoord2f(0,1); glVertex2f(-1, 1);
glEnd();
glBindTexture(GL_TEXTURE_2D, 0);
SwapBuffers(gHdc);
gPhase += 0.03f;
}
// ===========================================================
// Parameter evolution helpers
// ===========================================================
static void pack_params(float* theta)
{
int p = 0;
for(int i=0;i<NN_H*NN_IN;i++) theta[p++] = gHost_W1[i];
for(int i=0;i<NN_H;i++) theta[p++] = gHost_b1[i];
for(int i=0;i<NN_OUT*NN_H;i++)theta[p++] = gHost_W2[i];
for(int i=0;i<NN_OUT;i++) theta[p++] = gHost_b2[i];
}
static void unpack_params(const float* theta)
{
int p = 0;
for(int i=0;i<NN_H*NN_IN;i++) gHost_W1[i] = theta[p++];
for(int i=0;i<NN_H;i++) gHost_b1[i] = theta[p++];
for(int i=0;i<NN_OUT*NN_H;i++)gHost_W2[i] = theta[p++];
for(int i=0;i<NN_OUT;i++) gHost_b2[i] = theta[p++];
}
static void evolve_params_accumulated(float phase, unsigned int seed)
{
float theta[NN_PARAM_COUNT];
float nextv[NN_PARAM_COUNT];
pack_params(theta);
for(int i=0;i<NN_PARAM_COUNT;i++) {
int l = (i == 0) ? (NN_PARAM_COUNT - 1) : (i - 1);
int r = (i + 1) % NN_PARAM_COUNT;
float coupled = 0.55f*theta[l] + 0.45f*theta[r];
float drift = 0.015f*sinf(0.8f*phase + 0.17f*(float)i);
unsigned int h = mix_u32(seed ^ (unsigned int)(i*747796405u + 2891336453u));
float noise = (((float)(h & 0xFFFFu) / 65535.0f) * 2.0f - 1.0f) * 0.010f;
float v = 0.982f*theta[i] + 0.022f*coupled + drift + noise;
if(v > 3.0f) v = 3.0f;
if(v < -3.0f) v = -3.0f;
nextv[i] = v;
}
unpack_params(nextv);
float mW1 = 0.0f, mb1 = 0.0f, mW2 = 0.0f, mb2 = 0.0f;
for(int i=0;i<NN_H*NN_IN;i++) mW1 += gHost_W1[i];
for(int i=0;i<NN_H;i++) mb1 += gHost_b1[i];
for(int i=0;i<NN_OUT*NN_H;i++) mW2 += gHost_W2[i];
for(int i=0;i<NN_OUT;i++) mb2 += gHost_b2[i];
mW1 /= (float)(NN_H*NN_IN);
mb1 /= (float)NN_H;
mW2 /= (float)(NN_OUT*NN_H);
mb2 /= (float)NN_OUT;
for(int i=0;i<NN_H*NN_IN;i++) gHost_W1[i] += 0.003f*(mb1 - mW1);
for(int i=0;i<NN_H;i++) gHost_b1[i] += 0.004f*(mW2 - mb1);
for(int i=0;i<NN_OUT*NN_H;i++) gHost_W2[i] += 0.003f*(mb2 - mW2);
for(int i=0;i<NN_OUT;i++) gHost_b2[i] += 0.004f*(mW1 - mb2);
}
// ===========================================================
// WinMain
// ===========================================================
int WINAPI WinMain(HINSTANCE hInst, HINSTANCE, LPSTR, int)
{
const int maxSeconds = read_env_int("MENDB02_MAX_SECONDS", 0);
ULONGLONG startTick = GetTickCount64();
const char* szClass = "Mendb02NNCLGLClass";
UnregisterClassA(szClass, hInst);
WNDCLASSEXA wc;
ZeroMemory(&wc, sizeof(wc));
wc.cbSize = sizeof(wc);
wc.style = CS_HREDRAW | CS_VREDRAW;
wc.lpfnWndProc = WndProc;
wc.hInstance = hInst;
wc.hCursor = LoadCursor(NULL, IDC_ARROW);
wc.lpszClassName = szClass;
RegisterClassExA(&wc);
RECT r;
r.left=0; r.top=0; r.right=gW; r.bottom=gH;
AdjustWindowRect(&r, WS_OVERLAPPEDWINDOW, FALSE);
HWND hwnd = CreateWindowExA(
0, szClass, "NN Render (LibTorch weights + OpenCL + OpenGL)",
WS_OVERLAPPEDWINDOW,
100, 100, (r.right-r.left), (r.bottom-r.top),
0, 0, hInst, 0);
if(!hwnd) return 0;
ShowWindow(hwnd, SW_SHOW);
UpdateWindow(hwnd);
if(!gl_init_wgl(hwnd))
{
MessageBoxA(hwnd, "OpenGL init failed", "Error", MB_OK);
gl_release_all();
return 0;
}
if(!cl_init_glshare())
{
MessageBoxA(hwnd, "OpenCL GL-sharing init failed", "Error", MB_OK);
cl_release_all();
gl_release_all();
return 0;
}
MSG msg;
ZeroMemory(&msg, sizeof(msg));
while(msg.message != WM_QUIT)
{
while(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
{
TranslateMessage(&msg);
DispatchMessage(&msg);
}
if(is(EXITRUN) && !is(FIRSTINITRUN)) {
PostMessage(hwnd, WM_CLOSE, 0, 0);
}
if(!IsWindow(hwnd))
break;
if(maxSeconds > 0 && (GetTickCount64() - startTick) >= (ULONGLONG)maxSeconds * 1000ULL) {
PostMessage(hwnd, WM_CLOSE, 0, 0);
}
RenderFrame();
}
cl_release_all();
gl_release_all();
gHwnd = 0;
return 0;
}
// ===========================================================
// Input
// ===========================================================
LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
{
switch(msg)
{
case WM_CLOSE:
DestroyWindow(hWnd);
return 0;
case WM_KEYDOWN:
if(wParam == VK_ESCAPE || wParam == VK_F12) {
PostMessage(hWnd, WM_CLOSE, 0, 0);
return 0;
}
return 0;
case WM_DESTROY:
PostQuitMessage(0);
return 0;
}
return DefWindowProc(hWnd, msg, wParam, lParam);
}
// ===========================================================
// Zorro DLL entry
// ===========================================================
DLLFUNC int main()
{
NumTotalCycles = 1;
NumWFOCycles = 1;
NumSampleCycles = 1;
set(TESTNOW|OFF,ALLCYCLES|OFF,PARAMETERS|OFF,FACTORS|OFF,RULES|OFF);
static int done = 0;
if(is(FIRSTINITRUN))
done = 0;
if(done)
return 0;
(void)WinMain(GetModuleHandleA(NULL), NULL, GetCommandLineA(), SW_SHOWDEFAULT);
done = 1;
return 0;
}
210
65,908
Read More
|
|
|
03/05/26 19:35
Hi, please advise if you know the reason for this problem:
When running "Trade" the Z9 script with the newest 3.01.4 Zorro version, I receive the Error 047: SPY not enough ticks. Similarly for all other assets in the Assetlist.csv for Z9. One line before the Error, Zorro reports an up to date history file and 143 ticks read.
LogFile (just one Asset, it is the same for all of them): \History\ORCL.t6 history up to date. ORCL 143 ticks read Error 047: ORCL not enough ticks
With the same assetlist and history data Zorro 2.70 runs the Z9 trade with no problems.
Many thanks for any advise or hint. Cheers Jaroslav
6
214
Read More
|
|
03/05/26 02:58
void computeCorrelationMatrix() {
if(openCL.ready) {
buildFeatLinear();
for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrLinear[i] = 0.0f;
int ok = openCL.computeCorrelationMatrixCL(
featLinear.data,
corrLinear.data,
N_ASSETS,
FEAT_N,
FEAT_WINDOW
);
if(ok) {
for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrMatrix[i] = (fvar)0;
for(int a=0; a<N_ASSETS; a++){
corrMatrix[a*N_ASSETS + a] = (fvar)1.0;
for(int b=a+1; b<N_ASSETS; b++){
float c = corrLinear[a*N_ASSETS + b];
corrMatrix[a*N_ASSETS + b] = (fvar)c;
corrMatrix[b*N_ASSETS + a] = (fvar)c;
}
}
return;
}
printf("OpenCL: runtime fail -> CPU fallback\n");
openCL.ready = 0;
}
computeCorrelationMatrixCPU();
}
void computeDistanceMatrix() {
for(int i=0;i<N_ASSETS;i++){
for(int j=0;j<N_ASSETS;j++){
if(i == j) {
distMatrix[i*N_ASSETS + j] = (fvar)0;
} else {
fvar corrDist = (fvar)1.0 - (fvar)fabs((double)corrMatrix[i*N_ASSETS + j]);
fvar expDist = (fvar)exposureTable.getDist(i, j);
fvar blended = (fvar)LAMBDA_META * corrDist + (fvar)(1.0 - (double)LAMBDA_META) * expDist;
distMatrix[i*N_ASSETS + j] = blended;
}
}
}
}
void floydWarshall() {
fvar d[28][28];
for(int i=0;i<N_ASSETS;i++){
for(int j=0;j<N_ASSETS;j++){
d[i][j] = distMatrix[i*N_ASSETS + j];
if(i == j) d[i][j] = (fvar)0;
if(d[i][j] < (fvar)0) d[i][j] = (fvar)INF;
}
}
for(int k=0;k<N_ASSETS;k++){
for(int i=0;i<N_ASSETS;i++){
for(int j=0;j<N_ASSETS;j++){
if(d[i][k] < (fvar)INF && d[k][j] < (fvar)INF) {
fvar nk = d[i][k] + d[k][j];
if(nk < d[i][j]) d[i][j] = nk;
}
}
}
}
for(int i=0;i<N_ASSETS;i++){
fvar w = 0;
for(int j=i+1;j<N_ASSETS;j++){
if(d[i][j] < (fvar)INF) w += d[i][j];
}
if(w > (fvar)0) compactness[i] = (fvar)(1.0 / (1.0 + (double)w));
else compactness[i] = (fvar)0;
entropy[i] = computeEntropy(i);
}
}
void computeScores() {
for(int i=0;i<N_ASSETS;i++){
fvar coupling = 0;
int count = 0;
for(int j=0;j<N_ASSETS;j++){
if(i != j && distMatrix[i*N_ASSETS + j] < (fvar)INF) {
coupling += compactness[j];
count++;
}
}
fvar pCouple = 0;
if(count > 0) pCouple = coupling / (fvar)count;
else pCouple = (fvar)0;
fvar C_A = compactness[i];
fvar Ent = entropy[i];
fvar rawScore = (fvar)ALPHA * Ent + (fvar)GAMMA * C_A - (fvar)BETA * pCouple;
if(rawScore > (fvar)30) rawScore = (fvar)30;
if(rawScore < (fvar)-30) rawScore = (fvar)-30;
scores[i] = (fvar)(1.0 / (1.0 + exp(-(double)rawScore)));
}
}
LearningSnapshot buildSnapshot() {
LearningSnapshot s;
s.meanScore = 0; s.meanCompactness = 0; s.meanVol = 0;
for(int i=0;i<N_ASSETS;i++) {
s.meanScore += (double)scores[i];
s.meanCompactness += (double)compactness[i];
s.meanVol += (double)featSoA.get(2, i, 0);
}
s.meanScore /= (double)N_ASSETS;
s.meanCompactness /= (double)N_ASSETS;
s.meanVol /= (double)N_ASSETS;
s.regime = 0;
s.regimeConfidence = 0;
return s;
}
void onBar() {
barCount++;
for(int i=0;i<N_ASSETS;i++) computeFeatures(i);
if(barCount % UPDATE_EVERY == 0) {
updateCount++;
computeCorrelationMatrix();
computeDistanceMatrix();
#if USE_COMMUNITY
hclust.update(distMatrix.data);
#endif
#if USE_COMMUNITY
comm.update(corrMatrix.data, distMatrix.data);
#endif
floydWarshall();
computeScores();
controller.onUpdate(buildSnapshot(), scores.data, N_ASSETS, updateCount);
#if USE_AE
double aeState[AE_INPUT_DIM];
double ms=0, mc=0, mv=0;
for(int i=0;i<N_ASSETS;i++){ ms += (double)scores[i]; mc += (double)compactness[i]; mv += (double)featSoA.get(2, i, 0); }
ms /= (double)N_ASSETS; mc /= (double)N_ASSETS; mv /= (double)N_ASSETS;
aeState[0] = ms;
aeState[1] = mc;
aeState[2] = mv;
aeState[3] = controller.scoreScale;
aeState[4] = (double)controller.dynamicTopK;
aeState[5] = (double)barCount / (double)(LookBack + 1);
aeState[6] = (double)updateCount / 1000.0;
aeState[7] = (double)openCL.ready;
double reconErr = ae.infer(aeState);
novelty.update(reconErr);
novelty.apply(&controller.dynamicTopK, &controller.scoreScale);
for(int i=0;i<N_ASSETS;i++){{
double s = (double)scores[i] * novelty.riskScale;
if(s > 1.0) s = 1.0;
if(s < 0.0) s = 0.0;
scores[i] = (fvar)s;
}}
#endif
printTopK();
}
}
void printTopK() {
int indices[N_ASSETS];
for(int i=0;i<N_ASSETS;i++) indices[i] = i;
int topN = controller.dynamicTopK;
#if USE_COMMUNITY
if(comm.qSmooth < (fvar)COMM_Q_LOW && topN > 2) topN--;
if(comm.qSmooth > (fvar)COMM_Q_HIGH && topN < TOP_K) topN++;
#endif
for(int i=0;i<topN;i++){
for(int j=i+1;j<N_ASSETS;j++){
if(scores[indices[j]] > scores[indices[i]]) {
int tmp = indices[i];
indices[i] = indices[j];
indices[j] = tmp;
}
}
}
if(updateCount % 10 == 0) {
printf("===CrowdAverse_v13 Top-K(update#%d,OpenCL=%d)===\n",
updateCount, openCL.ready);
#if USE_COMMUNITY
printf(" communities=%d Q=%.4f\n", comm.nCommunities, (double)comm.qSmooth);
#endif
int selected[N_ASSETS];
int selCount = 0;
#if USE_COMMUNITY
int coarseUsed[HCLUST_COARSE_K];
int fineTake[HCLUST_FINE_K];
int fineCap = (topN + HCLUST_FINE_K - 1) / HCLUST_FINE_K;
for(int c=0;c<HCLUST_COARSE_K;c++) coarseUsed[c] = 0;
for(int c=0;c<HCLUST_FINE_K;c++) fineTake[c] = 0;
for(int i=0;i<topN;i++){
int idx = indices[i];
int cid = comm.clusterCoarse[idx];
if(cid < 0 || cid >= HCLUST_COARSE_K) cid = 0;
if(coarseUsed[cid]) continue;
coarseUsed[cid] = 1;
selected[selCount++] = idx;
int fid = comm.clusterFine[idx];
if(fid < 0 || fid >= HCLUST_FINE_K) fid = 0;
fineTake[fid]++;
}
for(int i=0;i<topN && selCount<topN;i++){
int idx = indices[i];
int dup = 0;
for(int k=0;k<selCount;k++) if(selected[k]==idx){ dup=1; break; }
if(dup) continue;
int fid = comm.clusterFine[idx];
if(fid < 0 || fid >= HCLUST_FINE_K) fid = 0;
if(fineTake[fid] >= fineCap) continue;
selected[selCount++] = idx;
fineTake[fid]++;
}
#else
for(int i=0;i<topN;i++) selected[selCount++] = indices[i];
#endif
for(int i=0;i<selCount;i++){
int idx = selected[i];
printf(" %d.%s: score=%.4f, C=%.4f, Ent=%.6f\n", i+1, ASSET_NAMES[idx], (double)scores[idx], (double)compactness[idx], (double)entropy[idx]);
}
}
}
};
// ---------------------------- Zorro DLL entry ----------------------------
static CrowdAverseStrategy* S = NULL;
DLLFUNC void run()
{
if(is(INITRUN)) {
BarPeriod = 60;
LookBack = max(LookBack, FEAT_WINDOW + 50);
asset((char*)ASSET_NAMES[0]);
if(!S) {
S = new CrowdAverseStrategy();
S->init();
}
}
if(is(EXITRUN)) {
if(S) {
S->shutdown();
delete S;
S = NULL;
}
return;
}
if(!S || Bar < LookBack)
return;
S->onBar();
}
210
65,908
Read More
|
|
03/05/26 02:49
This code is a Zorro strategy DLL that tries to pick a small set of currency pairs that are “least crowded” and most structurally independent, while constantly adapting its selection aggressiveness based on an internal learning controller. The strategy runs on an hourly bar schedule and treats the market as a network: each asset is a node, and the links between nodes represent similarity, either because the assets move together or because they share currency exposure. The goal is to score each asset as attractive when it is internally “coherent” but not overly synchronized with the rest of the crowd, then select a diversified Top K list. A major theme is performance and robustness: the heavy similarity computation can be accelerated by OpenCL if available, but the code always has a complete CPU fallback so it never depends on the GPU being present. At startup, the strategy defines a fixed universe of twenty eight FX pairs and a set of currencies used for exposure logic. It also defines a large collection of feature, learning, and clustering toggles. A custom “slab allocator” is used for predictable memory allocation and fast reuse, which matters in a strategy loop. A “feature buffer” is implemented as a structure of arrays ring buffer: for each feature and each asset it stores a rolling window of feature values. That is the raw time series substrate from which correlations are built. Every bar, the code computes nine features per asset: short return, medium return, volatility, a z style deviation from older price, a range style displacement, a flow proxy that mixes return and volatility, a crude regime flag from volatility, a volatility of volatility proxy, and a persistence proxy based on absolute return. These features are pushed into the rolling buffer, so the strategy always has a recent window of history to compare assets consistently. Every few bars, controlled by the update interval, the engine performs the expensive network step. First it builds a correlation matrix across assets. Conceptually, for each pair of assets and for each feature, it compares the two feature histories over the rolling window, measures how aligned they are, and averages that alignment across all features. This produces a single similarity value per asset pair that reflects multi feature co movement rather than just price correlation. On CPU this is a triple nested loop over features, assets, and time, which is heavy. The OpenCL path exists specifically to accelerate this step. The OpenCL backend is written in a defensive style: it loads OpenCL dynamically at runtime, queries a platform and a device, creates a context and queue, compiles a tiny kernel at runtime, and allocates two buffers: one for the packed feature window and one for the output correlations. If any step fails, it prints why and returns to CPU mode. The kernel itself assigns one work item to each asset pair and computes the correlation contribution across features and time. The GPU uses floats for speed, returns a packed correlation matrix, and the host copies it into the strategy’s correlation matrix. If the kernel call fails at runtime, OpenCL is disabled and the CPU path takes over. Once correlations exist, the strategy converts similarity into distance. The distance matrix is built from two ingredients. The first is correlation distance, which increases as absolute correlation decreases, pushing highly synchronized pairs farther apart. The second is exposure distance from the exposure table, intended to represent how different the currency exposure is between two pairs. The code blends these two distances with a configurable meta mixing weight. This is important because in FX, pairs can be correlated simply because they share USD or JPY exposure, so an exposure aware distance discourages choosing many pairs that are effectively the same trade. The resulting distance matrix is then treated as a weighted graph. To derive per asset “compactness,” the code runs a shortest path algorithm over this graph. It copies the distance matrix into a local square array and performs a Floyd Warshall style relaxation to compute all pairs shortest paths. That converts direct distances into effective distances that consider indirect paths through other assets. After that, each asset’s compactness is computed from the sum of its distances to others. Assets with small total distance to the rest are considered “central” or tightly connected in the network, while those with large total distance are more isolated. This compactness becomes one part of the scoring. In parallel, the strategy computes an entropy like measure per asset from the variance of its short return feature over the window. That entropy is a crude proxy for unpredictability or dispersion in that asset’s recent behavior. The scoring step implements the “crowd averse” idea directly. For each asset, it computes a coupling term: the average compactness of the other assets it is connected to. If an asset lives in a region of the network where everything else is very compact and tightly linked, it is penalized as being “crowded.” The raw score combines three forces: reward higher compactness for the asset itself, reward higher entropy as a potential source of opportunity, and penalize high coupling to the crowd. This raw score is then squashed into a zero to one score so it can be compared and scaled cleanly. At this stage, the engine has a score for every asset at the update point. On top of these network derived scores, the strategy layers a large “controller” that tries to adapt how aggressive the selection should be and how to scale scores depending on inferred regime. The controller begins with a simple unsupervised clustering over three snapshot statistics: mean score across assets, mean compactness across assets, and mean volatility. It maintains three centroids and assigns the current snapshot to the closest centroid, producing a coarse regime label and a confidence estimate. Then it optionally builds a lightweight PCA like representation over a sliding window of these snapshots. It normalizes features, derives three latent coordinates by fixed linear combinations, and estimates which latent dominates and how fast the dominance changes. Those two signals, dominance and rotation, are treated like “stability versus transition” indicators and are used to adjust aggressiveness. The controller can also use probabilistic regime models. A Gaussian mixture model is implemented with diagonal variances and incremental updating. It produces regime probabilities, an entropy measure over those probabilities, and a confidence. A hidden Markov style filter is also implemented, including a transition matrix and online smoothing of emission parameters; it produces posterior probabilities, entropy, and an implied switch probability. When uncertainty is high or switching seems likely, the controller enters a cooldown where it becomes more conservative. In addition, a k means model runs with an online update and tracks stability based on how surprising the current distance to centroid is compared to its own recent distribution. When stability is low, risk is reduced further. The controller translates these regime estimates into four adaptive multipliers that act like tuning knobs for how the base scoring philosophy should behave. It uses preset profiles per regime and blends them using regime probabilities. It then derives a global risk scale from regime uncertainty, so that high uncertainty means smaller risk. A tiny reinforcement learner sits on top, tracking four discrete actions that correspond to different selection intensities, updating action values based on whether the mean score improved since last update. This adds a slow meta adjustment layer that can prefer actions that historically improved outcomes. A novelty detector adds another safety valve. An autoencoder like model takes a compact state vector that summarizes the system at each update, including mean score, mean compactness, mean volatility, controller scaling, current Top K, progress through the run, and whether OpenCL is active. It reconstructs the input through a small latent mapping and measures reconstruction error. If reconstruction error jumps above thresholds, the novelty controller interprets that as an unusual condition and forces risk down, reducing the number of selections and scaling scores lower. This makes the strategy more cautious precisely when the internal system state deviates from what it has recently considered normal. There are also clustering and diversification helpers that influence how Top K is assembled rather than how each asset is scored. A hierarchical clustering model can cut the asset universe into coarse and fine clusters based on distances. A community detection model builds a graph from correlation magnitudes, optionally prunes to top links for determinism, runs a label propagation style update, and computes a modularity like value. That modularity is smoothed and used as another “market structure strength” signal: if community structure is weak, Top K is reduced; if community structure is strong, Top K may be increased, because diversification across strong communities is easier. The final selection step sorts assets by score, then tries to pick from different communities first, using coarse and fine caps so the chosen set is diversified rather than all from the same cluster. Operationally, the run function integrates with Zorro’s lifecycle. On init it sets bar period and lookback, creates the strategy object, and initializes all buffers and models. On exit it shuts everything down cleanly and frees resources. During the run, once enough bars exist, every bar computes features and every update interval computes correlations, distances, shortest paths, scores, learns from the snapshot, applies novelty and regime scaling, and prints the current Top K list periodically. The overall behavior is therefore a looping pipeline: feature extraction produces a multi dimensional history, correlation converts history into a similarity network, distance and shortest paths convert the network into structure measures, scoring transforms structure into “crowd averse attractiveness,” and the learning controller adjusts how selective and risk averse the engine should be as conditions change. If you want, I can also summarize it as a “flow chart” of stages or point out the single most expensive parts on CPU and how to reduce them (besides OpenCL). // TGr06B_CrowdAverse_v13.cpp - Zorro64 Strategy DLL
// Strategy B v13: Crowd-Averse with MX06 OOP + OpenCL + Learning Controller
//
// Notes:
// - Keeps full CPU fallback.
// - OpenCL is optional: if OpenCL.dll missing / no device / kernel build fails -> CPU path.
// - OpenCL accelerates the heavy correlation matrix step by offloading pairwise correlations.
// - Correlation is computed in float on GPU; results are stored back into fvar corrMatrix.
#define _CRT_SECURE_NO_WARNINGS
#include <zorro.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <windows.h>
#include <stddef.h>
#define INF 1e30
#define EPS 1e-12
#define N_ASSETS 28
#define FEAT_N 9
#define FEAT_WINDOW 200
#define UPDATE_EVERY 5
#define TOP_K 5
#define ALPHA 0.1
#define BETA 0.3
#define GAMMA 2.5
#define LAMBDA_META 0.5
#define USE_ML 1
#define USE_UNSUP 1
#define USE_RL 1
#define USE_PCA 1
#define USE_GMM 1
#define USE_HMM 1
#define HMM_K 3
#define HMM_DIM 8
#define HMM_VAR_FLOOR 1e-4
#define HMM_SMOOTH 0.02
#define HMM_ENTROPY_TH 0.85
#define HMM_SWITCH_TH 0.35
#define HMM_MIN_RISK 0.25
#define HMM_COOLDOWN_UPDATES 2
#define HMM_ONLINE_UPDATE 1
#define USE_KMEANS 1
#define KMEANS_K 3
#define KMEANS_DIM 8
#define KMEANS_ETA 0.03
#define KMEANS_DIST_EMA 0.08
#define KMEANS_STABILITY_MIN 0.35
#define KMEANS_ONLINE_UPDATE 1
#define USE_SPECTRAL 1
#define SPECTRAL_K 4
#define USE_HCLUST 1
#define HCLUST_COARSE_K 4
#define HCLUST_FINE_K 8
#define USE_COMMUNITY 1
#define COMM_W_MIN 0.15
#define COMM_TOPM 6
#define COMM_ITERS 4
#define COMM_Q_EMA 0.20
#define COMM_Q_LOW 0.20
#define COMM_Q_HIGH 0.45
#define USE_AE 1
#define AE_INPUT_DIM 8
#define AE_LATENT_DIM 4
#define AE_NORM_ALPHA 0.02
#define AE_ERR_EMA 0.10
#define AE_Z_LOW 1.0
#define AE_Z_HIGH 2.0
#define USE_SOM 1
#define SOM_W 10
#define SOM_H 10
#define SOM_DIM 12
#define SOM_ALPHA_MAX 0.30
#define SOM_ALPHA_MIN 0.05
#define SOM_SIGMA_MAX 5.0
#define SOM_SIGMA_MIN 1.0
#define SOM_CONF_MIN 0.15
#define SOM_ONLINE_UPDATE 1
#define GMM_K 3
#define GMM_DIM 8
#define GMM_ALPHA 0.02
#define GMM_VAR_FLOOR 1e-4
#define GMM_ENTROPY_COEFF 0.45
#define GMM_MIN_RISK 0.25
#define GMM_ONLINE_UPDATE 1
#define STRATEGY_PROFILE 1
#define PCA_DIM 6
#define PCA_COMP 3
#define PCA_WINDOW 128
#define PCA_REBUILD_EVERY 4
#ifdef TIGHT_MEM
typedef float fvar;
#else
typedef double fvar;
#endif
static const char* ASSET_NAMES[] = {
"EURUSD","GBPUSD","USDCHF","USDJPY","AUDUSD","AUDCAD","AUDCHF","AUDJPY","AUDNZD",
"CADJPY","CADCHF","EURAUD","EURCAD","EURCHF","EURGBP","EURJPY","EURNZD","GBPAUD",
"GBPCAD","GBPCHF","GBPJPY","GBPNZD","NZDCAD","NZDCHF","NZDJPY","NZDUSD","USDCAD"
};
static const char* CURRENCIES[] = {"EUR","GBP","USD","CHF","JPY","AUD","CAD","NZD"};
#define N_CURRENCIES 8
// ---------------------------- Exposure Table ----------------------------
struct ExposureTable {
int exposure[N_ASSETS][N_CURRENCIES];
double exposureDist[N_ASSETS][N_ASSETS];
void init() {
for(int i=0;i<N_ASSETS;i++){
for(int c=0;c<N_CURRENCIES;c++){
exposure[i][c] = 0;
}
}
for(int i=0;i<N_ASSETS;i++){
for(int j=0;j<N_ASSETS;j++){
exposureDist[i][j] = 0.0;
}
}
}
inline double getDist(int i,int j) const { return exposureDist[i][j]; }
};
// ---------------------------- Slab Allocator ----------------------------
template<typename T>
class SlabAllocator {
public:
T* data;
int capacity;
SlabAllocator() : data(NULL), capacity(0) {}
~SlabAllocator() { shutdown(); }
void init(int size) {
shutdown();
capacity = size;
data = (T*)malloc((size_t)capacity * sizeof(T));
if(data) memset(data, 0, (size_t)capacity * sizeof(T));
}
void shutdown() {
if(data) free(data);
data = NULL;
capacity = 0;
}
T& operator[](int i) { return data[i]; }
const T& operator[](int i) const { return data[i]; }
};
// ---------------------------- Feature Buffer (SoA ring) ----------------------------
struct FeatureBufferSoA {
SlabAllocator<fvar> buffer;
int windowSize;
int currentIndex;
void init(int assets, int window) {
windowSize = window;
currentIndex = 0;
buffer.init(FEAT_N * assets * window);
}
void shutdown() { buffer.shutdown(); }
inline int offset(int feat,int asset,int t) const {
return (feat * N_ASSETS + asset) * windowSize + t;
}
void push(int feat,int asset,fvar value) {
buffer[offset(feat, asset, currentIndex)] = value;
currentIndex = (currentIndex + 1) % windowSize;
}
// t=0 => most recent
fvar get(int feat,int asset,int t) const {
int idx = (currentIndex - 1 - t + windowSize) % windowSize;
return buffer[offset(feat, asset, idx)];
}
};
// ---------------------------- Minimal OpenCL (dynamic) ----------------------------
typedef struct _cl_platform_id* cl_platform_id;
typedef struct _cl_device_id* cl_device_id;
typedef struct _cl_context* cl_context;
typedef struct _cl_command_queue* cl_command_queue;
typedef struct _cl_program* cl_program;
typedef struct _cl_kernel* cl_kernel;
typedef struct _cl_mem* cl_mem;
typedef unsigned int cl_uint;
typedef int cl_int;
typedef unsigned long long cl_ulong;
typedef size_t cl_bool;
#define CL_SUCCESS 0
#define CL_DEVICE_TYPE_CPU (1ULL << 1)
#define CL_DEVICE_TYPE_GPU (1ULL << 2)
#define CL_MEM_READ_ONLY (1ULL << 2)
#define CL_MEM_WRITE_ONLY (1ULL << 1)
#define CL_MEM_READ_WRITE (1ULL << 0)
#define CL_TRUE 1
#define CL_FALSE 0
#define CL_PROGRAM_BUILD_LOG 0x1183
class OpenCLBackend {
public:
HMODULE hOpenCL;
int ready;
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kCorr;
cl_mem bufFeat;
cl_mem bufCorr;
int featBytes;
int corrBytes;
cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*);
cl_int (*clGetDeviceIDs)(cl_platform_id, cl_ulong, cl_uint, cl_device_id*, cl_uint*);
cl_context (*clCreateContext)(void*, cl_uint, const cl_device_id*, void*, void*, cl_int*);
cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_ulong, cl_int*);
cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*);
cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void*, void*);
cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_uint, size_t, void*, size_t*);
cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*);
cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*);
cl_mem (*clCreateBuffer)(cl_context, cl_ulong, size_t, void*, cl_int*);
cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const void*, void*);
cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const void*, void*);
cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const void*, void*);
cl_int (*clFinish)(cl_command_queue);
cl_int (*clReleaseMemObject)(cl_mem);
cl_int (*clReleaseKernel)(cl_kernel);
cl_int (*clReleaseProgram)(cl_program);
cl_int (*clReleaseCommandQueue)(cl_command_queue);
cl_int (*clReleaseContext)(cl_context);
OpenCLBackend()
: hOpenCL(NULL), ready(0),
platform(NULL), device(NULL), context(NULL), queue(NULL), program(NULL), kCorr(NULL),
bufFeat(NULL), bufCorr(NULL),
featBytes(0), corrBytes(0),
clGetPlatformIDs(NULL), clGetDeviceIDs(NULL), clCreateContext(NULL), clCreateCommandQueue(NULL),
clCreateProgramWithSource(NULL), clBuildProgram(NULL), clGetProgramBuildInfo(NULL),
clCreateKernel(NULL), clSetKernelArg(NULL),
clCreateBuffer(NULL), clEnqueueWriteBuffer(NULL), clEnqueueReadBuffer(NULL),
clEnqueueNDRangeKernel(NULL), clFinish(NULL),
clReleaseMemObject(NULL), clReleaseKernel(NULL), clReleaseProgram(NULL),
clReleaseCommandQueue(NULL), clReleaseContext(NULL)
{}
int loadSymbol(void** fp, const char* name) {
*fp = (void*)GetProcAddress(hOpenCL, name);
return (*fp != NULL);
}
const char* kernelSource() {
return
"__kernel void corr_pairwise(\n"
" __global const float* feat,\n"
" __global float* outCorr,\n"
" const int nAssets,\n"
" const int nFeat,\n"
" const int windowSize,\n"
" const float eps\n"
"){\n"
" int a = (int)get_global_id(0);\n"
" int b = (int)get_global_id(1);\n"
" if(a >= nAssets || b >= nAssets) return;\n"
" if(a >= b) return;\n"
" float acc = 0.0f;\n"
" for(int f=0; f<nFeat; f++){\n"
" int baseA = (f*nAssets + a) * windowSize;\n"
" int baseB = (f*nAssets + b) * windowSize;\n"
" float mx = 0.0f;\n"
" float my = 0.0f;\n"
" for(int t=0; t<windowSize; t++){\n"
" mx += feat[baseA + t];\n"
" my += feat[baseB + t];\n"
" }\n"
" mx /= (float)windowSize;\n"
" my /= (float)windowSize;\n"
" float sxx = 0.0f;\n"
" float syy = 0.0f;\n"
" float sxy = 0.0f;\n"
" for(int t=0; t<windowSize; t++){\n"
" float dx = feat[baseA + t] - mx;\n"
" float dy = feat[baseB + t] - my;\n"
" sxx += dx*dx;\n"
" syy += dy*dy;\n"
" sxy += dx*dy;\n"
" }\n"
" float den = sqrt(sxx*syy + eps);\n"
" float corr = (den > eps) ? (sxy/den) : 0.0f;\n"
" acc += corr;\n"
" }\n"
" outCorr[a*nAssets + b] = acc / (float)nFeat;\n"
"}\n";
}
void printBuildLog() {
if(!clGetProgramBuildInfo || !program || !device) return;
size_t logSize = 0;
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
if(logSize == 0) return;
char* log = (char*)malloc(logSize + 1);
if(!log) return;
memset(log, 0, logSize + 1);
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
printf("OpenCL build log:\n%s\n", log);
free(log);
}
void init() {
ready = 0;
hOpenCL = LoadLibraryA("OpenCL.dll");
if(!hOpenCL) {
printf("OpenCL: CPU (OpenCL.dll missing)\n");
return;
}
if(!loadSymbol((void**)&clGetPlatformIDs, "clGetPlatformIDs")) return;
if(!loadSymbol((void**)&clGetDeviceIDs, "clGetDeviceIDs")) return;
if(!loadSymbol((void**)&clCreateContext, "clCreateContext")) return;
if(!loadSymbol((void**)&clCreateCommandQueue, "clCreateCommandQueue")) return;
if(!loadSymbol((void**)&clCreateProgramWithSource,"clCreateProgramWithSource")) return;
if(!loadSymbol((void**)&clBuildProgram, "clBuildProgram")) return;
if(!loadSymbol((void**)&clGetProgramBuildInfo, "clGetProgramBuildInfo")) return;
if(!loadSymbol((void**)&clCreateKernel, "clCreateKernel")) return;
if(!loadSymbol((void**)&clSetKernelArg, "clSetKernelArg")) return;
if(!loadSymbol((void**)&clCreateBuffer, "clCreateBuffer")) return;
if(!loadSymbol((void**)&clEnqueueWriteBuffer, "clEnqueueWriteBuffer")) return;
if(!loadSymbol((void**)&clEnqueueReadBuffer, "clEnqueueReadBuffer")) return;
if(!loadSymbol((void**)&clEnqueueNDRangeKernel, "clEnqueueNDRangeKernel")) return;
if(!loadSymbol((void**)&clFinish, "clFinish")) return;
if(!loadSymbol((void**)&clReleaseMemObject, "clReleaseMemObject")) return;
if(!loadSymbol((void**)&clReleaseKernel, "clReleaseKernel")) return;
if(!loadSymbol((void**)&clReleaseProgram, "clReleaseProgram")) return;
if(!loadSymbol((void**)&clReleaseCommandQueue, "clReleaseCommandQueue")) return;
if(!loadSymbol((void**)&clReleaseContext, "clReleaseContext")) return;
cl_uint nPlat = 0;
if(clGetPlatformIDs(0, NULL, &nPlat) != CL_SUCCESS || nPlat == 0) {
printf("OpenCL: CPU (no platform)\n");
return;
}
clGetPlatformIDs(1, &platform, NULL);
cl_uint nDev = 0;
cl_int ok = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &nDev);
if(ok != CL_SUCCESS || nDev == 0) {
ok = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, &nDev);
if(ok != CL_SUCCESS || nDev == 0) {
printf("OpenCL: CPU (no device)\n");
return;
}
}
cl_int err = 0;
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if(err != CL_SUCCESS || !context) {
printf("OpenCL: CPU (context fail)\n");
return;
}
queue = clCreateCommandQueue(context, device, 0, &err);
if(err != CL_SUCCESS || !queue) {
printf("OpenCL: CPU (queue fail)\n");
return;
}
const char* src = kernelSource();
program = clCreateProgramWithSource(context, 1, &src, NULL, &err);
if(err != CL_SUCCESS || !program) {
printf("OpenCL: CPU (program fail)\n");
return;
}
err = clBuildProgram(program, 1, &device, "", NULL, NULL);
if(err != CL_SUCCESS) {
printf("OpenCL: CPU (build fail)\n");
printBuildLog();
return;
}
kCorr = clCreateKernel(program, "corr_pairwise", &err);
if(err != CL_SUCCESS || !kCorr) {
printf("OpenCL: CPU (kernel fail)\n");
printBuildLog();
return;
}
featBytes = FEAT_N * N_ASSETS * FEAT_WINDOW * (int)sizeof(float);
corrBytes = N_ASSETS * N_ASSETS * (int)sizeof(float);
bufFeat = clCreateBuffer(context, CL_MEM_READ_ONLY, (size_t)featBytes, NULL, &err);
if(err != CL_SUCCESS || !bufFeat) {
printf("OpenCL: CPU (bufFeat fail)\n");
return;
}
bufCorr = clCreateBuffer(context, CL_MEM_WRITE_ONLY, (size_t)corrBytes, NULL, &err);
if(err != CL_SUCCESS || !bufCorr) {
printf("OpenCL: CPU (bufCorr fail)\n");
return;
}
ready = 1;
printf("OpenCL: READY (kernel+buffers)\n");
}
void shutdown() {
if(bufCorr) { clReleaseMemObject(bufCorr); bufCorr = NULL; }
if(bufFeat) { clReleaseMemObject(bufFeat); bufFeat = NULL; }
if(kCorr) { clReleaseKernel(kCorr); kCorr = NULL; }
if(program) { clReleaseProgram(program); program = NULL; }
if(queue) { clReleaseCommandQueue(queue); queue = NULL; }
if(context) { clReleaseContext(context); context = NULL; }
if(hOpenCL) { FreeLibrary(hOpenCL); hOpenCL = NULL; }
ready = 0;
}
int computeCorrelationMatrixCL(const float* featLinear, float* outCorr, int nAssets, int nFeat, int windowSize) {
if(!ready) return 0;
if(!featLinear || !outCorr) return 0;
cl_int err = clEnqueueWriteBuffer(queue, bufFeat, CL_TRUE, 0, (size_t)featBytes, featLinear, 0, NULL, NULL);
if(err != CL_SUCCESS) return 0;
float eps = 1e-12f;
err = CL_SUCCESS;
err |= clSetKernelArg(kCorr, 0, sizeof(cl_mem), &bufFeat);
err |= clSetKernelArg(kCorr, 1, sizeof(cl_mem), &bufCorr);
err |= clSetKernelArg(kCorr, 2, sizeof(int), &nAssets);
err |= clSetKernelArg(kCorr, 3, sizeof(int), &nFeat);
err |= clSetKernelArg(kCorr, 4, sizeof(int), &windowSize);
err |= clSetKernelArg(kCorr, 5, sizeof(float), &eps);
if(err != CL_SUCCESS) return 0;
size_t global[2];
global[0] = (size_t)nAssets;
global[1] = (size_t)nAssets;
err = clEnqueueNDRangeKernel(queue, kCorr, 2, NULL, global, NULL, 0, NULL, NULL);
if(err != CL_SUCCESS) return 0;
err = clFinish(queue);
if(err != CL_SUCCESS) return 0;
err = clEnqueueReadBuffer(queue, bufCorr, CL_TRUE, 0, (size_t)corrBytes, outCorr, 0, NULL, NULL);
if(err != CL_SUCCESS) return 0;
return 1;
}
};
// ---------------------------- Learning Layer ----------------------------
struct LearningSnapshot {
double meanScore;
double meanCompactness;
double meanVol;
int regime;
double regimeConfidence;
};
class UnsupervisedModel {
public:
double centroids[3][3];
int counts[3];
int initialized;
UnsupervisedModel() : initialized(0) { memset(centroids, 0, sizeof(centroids)); memset(counts, 0, sizeof(counts)); }
void init() { initialized = 0; memset(centroids, 0, sizeof(centroids)); memset(counts, 0, sizeof(counts)); }
void update(const LearningSnapshot& s, int* regimeOut, double* confOut) {
double x0=s.meanScore,x1=s.meanCompactness,x2=s.meanVol;
if(!initialized) {
for(int k=0;k<3;k++){ centroids[k][0]=x0+0.01*(k-1); centroids[k][1]=x1+0.01*(1-k); centroids[k][2]=x2+0.005*(k-1); counts[k]=1; }
initialized = 1;
}
int best=0; double bestDist=INF, secondDist=INF;
for(int k=0;k<3;k++) {
double d0=x0-centroids[k][0], d1=x1-centroids[k][1], d2=x2-centroids[k][2];
double dist=d0*d0+d1*d1+d2*d2;
if(dist < bestDist){ secondDist=bestDist; bestDist=dist; best=k; }
else if(dist < secondDist){ secondDist=dist; }
}
counts[best]++;
double lr = 1.0/(double)counts[best];
centroids[best][0] += lr*(x0-centroids[best][0]);
centroids[best][1] += lr*(x1-centroids[best][1]);
centroids[best][2] += lr*(x2-centroids[best][2]);
*regimeOut = best;
*confOut = 1.0/(1.0 + sqrt(fabs(secondDist-bestDist)+EPS));
}
};
class RLAgent {
public:
double q[4]; int n[4]; int lastAction; double lastMeanScore;
RLAgent() : lastAction(0), lastMeanScore(0) { for(int i=0;i<4;i++){q[i]=0;n[i]=0;} }
void init(){ lastAction=0; lastMeanScore=0; for(int i=0;i<4;i++){q[i]=0;n[i]=0;} }
int chooseAction(int updateCount){ if((updateCount%10)==0) return updateCount%4; int b=0; for(int i=1;i<4;i++) if(q[i]>q[b]) b=i; return b; }
void updateReward(double newMeanScore){ double r=newMeanScore-lastMeanScore; n[lastAction]++; q[lastAction]+=(r-q[lastAction])/(double)n[lastAction]; lastMeanScore=newMeanScore; }
};
class PCAModel {
public:
double hist[PCA_WINDOW][PCA_DIM];
double mean[PCA_DIM];
double stdev[PCA_DIM];
double latent[PCA_COMP];
double explainedVar[PCA_COMP];
int writeIdx;
int count;
int rebuildEvery;
int updates;
double dom;
double rot;
double prevExplained0;
PCAModel() : writeIdx(0), count(0), rebuildEvery(PCA_REBUILD_EVERY), updates(0), dom(0), rot(0), prevExplained0(0) {
memset(hist, 0, sizeof(hist));
memset(mean, 0, sizeof(mean));
memset(stdev, 0, sizeof(stdev));
memset(latent, 0, sizeof(latent));
memset(explainedVar, 0, sizeof(explainedVar));
}
void init() {
writeIdx = 0;
count = 0;
updates = 0;
dom = 0;
rot = 0;
prevExplained0 = 0;
memset(hist, 0, sizeof(hist));
memset(mean, 0, sizeof(mean));
memset(stdev, 0, sizeof(stdev));
memset(latent, 0, sizeof(latent));
memset(explainedVar, 0, sizeof(explainedVar));
}
void pushSnapshot(const double x[PCA_DIM]) {
for(int d=0; d<PCA_DIM; d++) hist[writeIdx][d] = x[d];
writeIdx = (writeIdx + 1) % PCA_WINDOW;
if(count < PCA_WINDOW) count++;
}
void rebuildStats() {
if(count <= 0) return;
for(int d=0; d<PCA_DIM; d++) {
double m = 0;
for(int i=0; i<count; i++) m += hist[i][d];
m /= (double)count;
mean[d] = m;
double v = 0;
for(int i=0; i<count; i++) {
double dd = hist[i][d] - m;
v += dd * dd;
}
v /= (double)count;
stdev[d] = sqrt(v + EPS);
}
}
void update(const LearningSnapshot& snap, int regime, double conf) {
double x[PCA_DIM];
x[0] = snap.meanScore;
x[1] = snap.meanCompactness;
x[2] = snap.meanVol;
x[3] = (double)regime / 2.0;
x[4] = conf;
x[5] = snap.meanScore - snap.meanCompactness;
pushSnapshot(x);
updates++;
if((updates % rebuildEvery) == 0 || count < 4) rebuildStats();
double z[PCA_DIM];
for(int d=0; d<PCA_DIM; d++) z[d] = (x[d] - mean[d]) / (stdev[d] + EPS);
latent[0] = 0.60*z[0] + 0.30*z[1] + 0.10*z[2];
latent[1] = 0.25*z[0] - 0.45*z[1] + 0.20*z[2] + 0.10*z[4];
latent[2] = 0.20*z[2] + 0.50*z[3] - 0.30*z[5];
double a0 = fabs(latent[0]);
double a1 = fabs(latent[1]);
double a2 = fabs(latent[2]);
double sumA = a0 + a1 + a2 + EPS;
explainedVar[0] = a0 / sumA;
explainedVar[1] = a1 / sumA;
explainedVar[2] = a2 / sumA;
dom = explainedVar[0];
rot = fabs(explainedVar[0] - prevExplained0);
prevExplained0 = explainedVar[0];
}
};
class GMMRegimeModel {
public:
double pi[GMM_K];
double mu[GMM_K][GMM_DIM];
double var[GMM_K][GMM_DIM];
double p[GMM_K];
double entropy;
double conf;
int bestRegime;
int initialized;
GMMRegimeModel() : entropy(0), conf(0), bestRegime(0), initialized(0) {
memset(pi, 0, sizeof(pi));
memset(mu, 0, sizeof(mu));
memset(var, 0, sizeof(var));
memset(p, 0, sizeof(p));
}
void init() {
initialized = 0;
entropy = 0;
conf = 0;
bestRegime = 0;
for(int k=0;k<GMM_K;k++) {
pi[k] = 1.0 / (double)GMM_K;
for(int d=0; d<GMM_DIM; d++) {
mu[k][d] = 0.02 * (k - 1);
var[k][d] = 1.0;
}
p[k] = 1.0 / (double)GMM_K;
}
initialized = 1;
}
static double gaussianDiag(const double* x, const double* m, const double* v) {
double logp = 0;
for(int d=0; d<GMM_DIM; d++) {
double vv = v[d];
if(vv < GMM_VAR_FLOOR) vv = GMM_VAR_FLOOR;
double z = x[d] - m[d];
logp += -0.5 * (z*z / vv + log(vv + EPS));
}
if(logp < -80.0) logp = -80.0;
return exp(logp);
}
void infer(const double x[GMM_DIM]) {
if(!initialized) init();
double sum = 0;
for(int k=0;k<GMM_K;k++) {
double g = gaussianDiag(x, mu[k], var[k]);
p[k] = pi[k] * g;
sum += p[k];
}
if(sum < EPS) {
for(int k=0;k<GMM_K;k++) p[k] = 1.0 / (double)GMM_K;
} else {
for(int k=0;k<GMM_K;k++) p[k] /= sum;
}
bestRegime = 0;
conf = p[0];
for(int k=1;k<GMM_K;k++) {
if(p[k] > conf) {
conf = p[k];
bestRegime = k;
}
}
entropy = 0;
for(int k=0;k<GMM_K;k++) entropy -= p[k] * log(p[k] + EPS);
#if GMM_ONLINE_UPDATE
// lightweight incremental update (EM-like with forgetting)
for(int k=0;k<GMM_K;k++) {
double w = GMM_ALPHA * p[k];
pi[k] = (1.0 - GMM_ALPHA) * pi[k] + w;
for(int d=0; d<GMM_DIM; d++) {
double diff = x[d] - mu[k][d];
mu[k][d] += w * diff;
var[k][d] = (1.0 - w) * var[k][d] + w * diff * diff;
if(var[k][d] < GMM_VAR_FLOOR) var[k][d] = GMM_VAR_FLOOR;
}
}
#endif
}
};
class HMMRegimeModel {
public:
double A[HMM_K][HMM_K];
double mu[HMM_K][HMM_DIM];
double var[HMM_K][HMM_DIM];
double posterior[HMM_K];
double entropy;
double conf;
double switchProb;
int regime;
int initialized;
HMMRegimeModel() : entropy(0), conf(0), switchProb(0), regime(0), initialized(0) {
memset(A, 0, sizeof(A));
memset(mu, 0, sizeof(mu));
memset(var, 0, sizeof(var));
memset(posterior, 0, sizeof(posterior));
}
void init() {
for(int i=0;i<HMM_K;i++) {
for(int j=0;j<HMM_K;j++) A[i][j] = (i==j) ? 0.90 : 0.10/(double)(HMM_K-1);
for(int d=0; d<HMM_DIM; d++) {
mu[i][d] = 0.03 * (i - 1);
var[i][d] = 1.0;
}
posterior[i] = 1.0/(double)HMM_K;
}
regime = 0;
conf = posterior[0];
entropy = 0;
switchProb = 0;
initialized = 1;
}
static double emissionDiag(const double* x, const double* m, const double* v) {
double logp = 0;
for(int d=0; d<HMM_DIM; d++) {
double vv = v[d];
if(vv < HMM_VAR_FLOOR) vv = HMM_VAR_FLOOR;
double z = x[d] - m[d];
logp += -0.5 * (z*z / vv + log(vv + EPS));
}
if(logp < -80.0) logp = -80.0;
return exp(logp);
}
void filter(const double obs[HMM_DIM]) {
if(!initialized) init();
double pred[HMM_K];
for(int j=0;j<HMM_K;j++) {
pred[j] = 0;
for(int i=0;i<HMM_K;i++) pred[j] += posterior[i] * A[i][j];
}
double alpha[HMM_K];
double sum = 0;
for(int k=0;k<HMM_K;k++) {
double emit = emissionDiag(obs, mu[k], var[k]);
alpha[k] = pred[k] * emit;
sum += alpha[k];
}
if(sum < EPS) {
for(int k=0;k<HMM_K;k++) alpha[k] = 1.0/(double)HMM_K;
} else {
for(int k=0;k<HMM_K;k++) alpha[k] /= sum;
}
for(int k=0;k<HMM_K;k++) posterior[k] = alpha[k];
regime = 0;
conf = posterior[0];
for(int k=1;k<HMM_K;k++) if(posterior[k] > conf) { conf = posterior[k]; regime = k; }
entropy = 0;
for(int k=0;k<HMM_K;k++) entropy -= posterior[k] * log(posterior[k] + EPS);
switchProb = 1.0 - A[regime][regime];
if(switchProb < 0) switchProb = 0;
if(switchProb > 1) switchProb = 1;
#if HMM_ONLINE_UPDATE
for(int k=0;k<HMM_K;k++) {
double w = HMM_SMOOTH * posterior[k];
for(int d=0; d<HMM_DIM; d++) {
double diff = obs[d] - mu[k][d];
mu[k][d] += w * diff;
var[k][d] = (1.0 - w) * var[k][d] + w * diff * diff;
if(var[k][d] < HMM_VAR_FLOOR) var[k][d] = HMM_VAR_FLOOR;
}
}
#endif
}
};
class KMeansRegimeModel {
public:
double centroids[KMEANS_K][KMEANS_DIM];
double distEma;
double distVarEma;
int initialized;
int regime;
double dist;
double stability;
KMeansRegimeModel() : distEma(0), distVarEma(1), initialized(0), regime(0), dist(0), stability(0) {
memset(centroids, 0, sizeof(centroids));
}
void init() {
distEma = 0;
distVarEma = 1;
initialized = 0;
regime = 0;
dist = 0;
stability = 0;
memset(centroids, 0, sizeof(centroids));
}
void seed(const double x[KMEANS_DIM]) {
for(int k=0;k<KMEANS_K;k++) {
for(int d=0; d<KMEANS_DIM; d++) {
centroids[k][d] = x[d] + 0.03 * (k - 1);
}
}
initialized = 1;
}
static double clampRange(double x, double lo, double hi) {
if(x < lo) return lo;
if(x > hi) return hi;
return x;
}
void predictAndUpdate(const double x[KMEANS_DIM]) {
if(!initialized) seed(x);
int best = 0;
double bestDist = INF;
for(int k=0;k<KMEANS_K;k++) {
double s = 0;
for(int d=0; d<KMEANS_DIM; d++) {
double z = x[d] - centroids[k][d];
s += z * z;
}
double dk = sqrt(s + EPS);
if(dk < bestDist) {
bestDist = dk;
best = k;
}
}
regime = best;
dist = bestDist;
distEma = (1.0 - KMEANS_DIST_EMA) * distEma + KMEANS_DIST_EMA * dist;
double dd = dist - distEma;
distVarEma = (1.0 - KMEANS_DIST_EMA) * distVarEma + KMEANS_DIST_EMA * dd * dd;
double distStd = sqrt(distVarEma + EPS);
double zDist = (dist - distEma) / (distStd + EPS);
stability = clampRange(1.0 / (1.0 + exp(zDist)), 0.0, 1.0);
#if KMEANS_ONLINE_UPDATE
for(int d=0; d<KMEANS_DIM; d++) {
centroids[best][d] += KMEANS_ETA * (x[d] - centroids[best][d]);
}
#endif
}
};
class SpectralClusterModel {
public:
int clusterId[N_ASSETS];
int nClusters;
void init() {
nClusters = SPECTRAL_K;
for(int i=0;i<N_ASSETS;i++) clusterId[i] = i % SPECTRAL_K;
}
void update(const fvar* distMatrix) {
if(!distMatrix) return;
// lightweight deterministic clustering surrogate from distance rows
for(int i=0;i<N_ASSETS;i++) {
double sig = 0;
for(int j=0;j<N_ASSETS;j++) {
if(i == j) continue;
double d = (double)distMatrix[i*N_ASSETS + j];
if(d < INF) sig += d;
}
int cid = (int)fmod(fabs(sig * 1000.0), (double)SPECTRAL_K);
if(cid < 0) cid = 0;
if(cid >= SPECTRAL_K) cid = SPECTRAL_K - 1;
clusterId[i] = cid;
}
}
};
class HierarchicalClusteringModel {
public:
int clusterCoarse[N_ASSETS];
int clusterFine[N_ASSETS];
int nCoarse;
int nFine;
int leftChild[2*N_ASSETS];
int rightChild[2*N_ASSETS];
int nodeSize[2*N_ASSETS];
double nodeHeight[2*N_ASSETS];
double nodeDist[2*N_ASSETS][2*N_ASSETS];
int rootNode;
void init() {
nCoarse = HCLUST_COARSE_K;
nFine = HCLUST_FINE_K;
rootNode = N_ASSETS - 1;
for(int i=0;i<N_ASSETS;i++) {
clusterCoarse[i] = i % HCLUST_COARSE_K;
clusterFine[i] = i % HCLUST_FINE_K;
}
}
void collectLeaves(int node, int clusterId, int* out) {
int stack[2*N_ASSETS];
int sp = 0;
stack[sp++] = node;
while(sp > 0) {
int cur = stack[--sp];
if(cur < N_ASSETS) {
out[cur] = clusterId;
} else {
if(leftChild[cur] >= 0) stack[sp++] = leftChild[cur];
if(rightChild[cur] >= 0) stack[sp++] = rightChild[cur];
}
}
}
void cutByK(int K, int* out) {
for(int i=0;i<N_ASSETS;i++) out[i] = -1;
if(K <= 1) {
for(int i=0;i<N_ASSETS;i++) out[i] = 0;
return;
}
int clusters[2*N_ASSETS];
int count = 1;
clusters[0] = rootNode;
while(count < K) {
int bestPos = -1;
double bestHeight = -1;
for(int i=0;i<count;i++) {
int node = clusters[i];
if(node >= N_ASSETS && nodeHeight[node] > bestHeight) {
bestHeight = nodeHeight[node];
bestPos = i;
}
}
if(bestPos < 0) break;
int node = clusters[bestPos];
int l = leftChild[node];
int r = rightChild[node];
clusters[bestPos] = l;
clusters[count++] = r;
}
for(int c=0;c<count;c++) {
collectLeaves(clusters[c], c, out);
}
for(int i=0;i<N_ASSETS;i++) if(out[i] < 0) out[i] = 0;
}
void update(const fvar* distMatrix) {
if(!distMatrix) return;
int totalNodes = 2 * N_ASSETS;
for(int i=0;i<totalNodes;i++) {
leftChild[i] = -1;
rightChild[i] = -1;
nodeSize[i] = (i < N_ASSETS) ? 1 : 0;
nodeHeight[i] = 0;
for(int j=0;j<totalNodes;j++) nodeDist[i][j] = INF;
}
for(int i=0;i<N_ASSETS;i++) {
for(int j=0;j<N_ASSETS;j++) {
if(i == j) nodeDist[i][j] = 0;
else {
double d = (double)distMatrix[i*N_ASSETS + j];
if(d < 0 || d >= INF) d = 1.0;
nodeDist[i][j] = d;
}
}
}
int active[2*N_ASSETS];
int nActive = N_ASSETS;
for(int i=0;i<N_ASSETS;i++) active[i] = i;
int nextNode = N_ASSETS;
while(nActive > 1 && nextNode < 2*N_ASSETS) {
int ai = 0, aj = 1;
double best = INF;
for(int i=0;i<nActive;i++) {
for(int j=i+1;j<nActive;j++) {
int a = active[i], b = active[j];
if(nodeDist[a][b] < best) {
best = nodeDist[a][b];
ai = i; aj = j;
}
}
}
int a = active[ai];
int b = active[aj];
int m = nextNode++;
leftChild[m] = a;
rightChild[m] = b;
nodeHeight[m] = best;
nodeSize[m] = nodeSize[a] + nodeSize[b];
for(int i=0;i<nActive;i++) {
if(i == ai || i == aj) continue;
int k = active[i];
double da = nodeDist[a][k];
double db = nodeDist[b][k];
double dm = (nodeSize[a] * da + nodeSize[b] * db) / (double)(nodeSize[a] + nodeSize[b]);
nodeDist[m][k] = dm;
nodeDist[k][m] = dm;
}
nodeDist[m][m] = 0;
if(aj < ai) { int t=ai; ai=aj; aj=t; }
for(int i=aj;i<nActive-1;i++) active[i] = active[i+1];
nActive--;
for(int i=ai;i<nActive-1;i++) active[i] = active[i+1];
nActive--;
active[nActive++] = m;
}
rootNode = active[0];
int kc = HCLUST_COARSE_K;
if(kc < 1) kc = 1;
if(kc > N_ASSETS) kc = N_ASSETS;
int kf = HCLUST_FINE_K;
if(kf < 1) kf = 1;
if(kf > N_ASSETS) kf = N_ASSETS;
cutByK(kc, clusterCoarse);
cutByK(kf, clusterFine);
nCoarse = kc;
nFine = kf;
}
};
class CommunityDetectionModel {
public:
int communityId[N_ASSETS];
int clusterCoarse[N_ASSETS];
int clusterFine[N_ASSETS];
int nCommunities;
fvar modularityQ;
fvar qSmooth;
void init() {
nCommunities = 1;
modularityQ = 0;
qSmooth = 0;
for(int i=0;i<N_ASSETS;i++) {
communityId[i] = 0;
clusterCoarse[i] = i % HCLUST_COARSE_K;
clusterFine[i] = i % HCLUST_FINE_K;
}
}
static int argmaxLabel(const fvar w[N_ASSETS], const int label[N_ASSETS], int node) {
fvar acc[N_ASSETS];
for(int i=0;i<N_ASSETS;i++) acc[i] = 0;
for(int j=0;j<N_ASSETS;j++) {
if(j == node) continue;
int l = label[j];
if(l < 0 || l >= N_ASSETS) continue;
acc[l] += w[j];
}
int best = label[node];
fvar bestV = -1;
for(int l=0;l<N_ASSETS;l++) {
if(acc[l] > bestV) { bestV = acc[l]; best = l; }
}
return best;
}
void update(const fvar* corrMatrix, const fvar* distMatrix) {
if(!corrMatrix || !distMatrix) return;
fvar W[N_ASSETS][N_ASSETS];
fvar degree[N_ASSETS];
int label[N_ASSETS];
for(int i=0;i<N_ASSETS;i++) {
degree[i] = 0;
label[i] = i;
for(int j=0;j<N_ASSETS;j++) {
if(i == j) W[i][j] = 0;
else {
fvar w = (fvar)fabs((double)corrMatrix[i*N_ASSETS + j]);
if(w < (fvar)COMM_W_MIN) w = 0;
W[i][j] = w;
degree[i] += w;
}
}
}
// Optional top-M pruning for determinism/noise control
for(int i=0;i<N_ASSETS;i++) {
int keep[N_ASSETS];
for(int j=0;j<N_ASSETS;j++) keep[j] = 0;
for(int k=0;k<COMM_TOPM;k++) {
int best = -1;
fvar bestW = 0;
for(int j=0;j<N_ASSETS;j++) {
if(i==j || keep[j]) continue;
if(W[i][j] > bestW) { bestW = W[i][j]; best = j; }
}
if(best >= 0) keep[best] = 1;
}
for(int j=0;j<N_ASSETS;j++) if(i!=j && !keep[j]) W[i][j] = 0;
}
for(int it=0; it<COMM_ITERS; it++) {
for(int i=0;i<N_ASSETS;i++) {
label[i] = argmaxLabel(W[i], label, i);
}
}
// compress labels
int map[N_ASSETS];
for(int i=0;i<N_ASSETS;i++) map[i] = -1;
int nLab = 0;
for(int i=0;i<N_ASSETS;i++) {
int l = label[i];
if(l < 0 || l >= N_ASSETS) l = 0;
if(map[l] < 0) map[l] = nLab++;
communityId[i] = map[l];
}
if(nLab < 1) nLab = 1;
nCommunities = nLab;
// modularity approximation
fvar m2 = 0;
for(int i=0;i<N_ASSETS;i++) for(int j=0;j<N_ASSETS;j++) m2 += W[i][j];
if(m2 < (fvar)EPS) {
modularityQ = 0;
} else {
fvar q = 0;
for(int i=0;i<N_ASSETS;i++) {
for(int j=0;j<N_ASSETS;j++) {
if(communityId[i] == communityId[j]) {
q += W[i][j] - (degree[i] * degree[j] / m2);
}
}
}
modularityQ = q / m2;
}
qSmooth = (fvar)(1.0 - COMM_Q_EMA) * qSmooth + (fvar)COMM_Q_EMA * modularityQ;
for(int i=0;i<N_ASSETS;i++) {
int c = communityId[i];
if(c < 0) c = 0;
clusterCoarse[i] = c % HCLUST_COARSE_K;
clusterFine[i] = c % HCLUST_FINE_K;
}
}
};
class AutoencoderModel {
public:
double mu[AE_INPUT_DIM];
double sigma[AE_INPUT_DIM];
double W1[AE_LATENT_DIM][AE_INPUT_DIM];
double W2[AE_INPUT_DIM][AE_LATENT_DIM];
int initialized;
void init() {
initialized = 1;
for(int i=0;i<AE_INPUT_DIM;i++) {
mu[i] = 0;
sigma[i] = 1;
}
for(int z=0;z<AE_LATENT_DIM;z++) {
for(int d=0;d<AE_INPUT_DIM;d++) {
double w = sin((double)(z+1)*(d+1)) * 0.05;
W1[z][d] = w;
W2[d][z] = w;
}
}
}
static double act(double x) {
if(x > 4) x = 4;
if(x < -4) x = -4;
return tanh(x);
}
double infer(const double xIn[AE_INPUT_DIM]) {
if(!initialized) init();
double x[AE_INPUT_DIM];
for(int d=0;d<AE_INPUT_DIM;d++) x[d] = (xIn[d] - mu[d]) / (sigma[d] + EPS);
double z[AE_LATENT_DIM];
for(int k=0;k<AE_LATENT_DIM;k++) {
double s = 0;
for(int d=0;d<AE_INPUT_DIM;d++) s += W1[k][d] * x[d];
z[k] = act(s);
}
double recon[AE_INPUT_DIM];
for(int d=0;d<AE_INPUT_DIM;d++) {
double s = 0;
for(int k=0;k<AE_LATENT_DIM;k++) s += W2[d][k] * z[k];
recon[d] = act(s);
}
double err = 0;
for(int d=0;d<AE_INPUT_DIM;d++) {
double e = x[d] - recon[d];
err += e*e;
}
err /= (double)AE_INPUT_DIM;
for(int d=0;d<AE_INPUT_DIM;d++) {
mu[d] = (1.0 - AE_NORM_ALPHA) * mu[d] + AE_NORM_ALPHA * xIn[d];
double dv = xIn[d] - mu[d];
sigma[d] = (1.0 - AE_NORM_ALPHA) * sigma[d] + AE_NORM_ALPHA * sqrt(dv*dv + EPS);
if(sigma[d] < 1e-5) sigma[d] = 1e-5;
}
return err;
}
};
class NoveltyController {
public:
double errEma;
double errVar;
double zRecon;
int regime;
double riskScale;
void init() {
errEma = 0;
errVar = 1;
zRecon = 0;
regime = 0;
riskScale = 1.0;
}
static double clampRange(double x, double lo, double hi) {
if(x < lo) return lo;
if(x > hi) return hi;
return x;
}
void update(double reconError) {
errEma = (1.0 - AE_ERR_EMA) * errEma + AE_ERR_EMA * reconError;
double d = reconError - errEma;
errVar = (1.0 - AE_ERR_EMA) * errVar + AE_ERR_EMA * d*d;
double errStd = sqrt(errVar + EPS);
zRecon = (reconError - errEma) / (errStd + EPS);
if(zRecon >= AE_Z_HIGH) { regime = 2; riskScale = 0.20; }
else if(zRecon >= AE_Z_LOW) { regime = 1; riskScale = 0.60; }
else { regime = 0; riskScale = 1.00; }
riskScale = clampRange(riskScale, 0.20, 1.00);
}
void apply(int* topK, double* scoreScale) {
if(regime == 2) {
if(*topK > 3) *topK -= 2;
*scoreScale *= 0.60;
} else if(regime == 1) {
if(*topK > 3) *topK -= 1;
*scoreScale *= 0.85;
}
if(*topK < 1) *topK = 1;
if(*topK > TOP_K) *topK = TOP_K;
*scoreScale = clampRange(*scoreScale, 0.10, 2.00);
}
};
class SOMModel {
public:
double W[SOM_H][SOM_W][SOM_DIM];
int hitCount[SOM_H][SOM_W];
int bmuX;
int bmuY;
double conf;
int initialized;
void init() {
initialized = 1;
bmuX = 0; bmuY = 0; conf = 0;
for(int y=0;y<SOM_H;y++) {
for(int x=0;x<SOM_W;x++) {
hitCount[y][x] = 0;
for(int d=0;d<SOM_DIM;d++) {
W[y][x][d] = 0.02 * sin((double)(y+1)*(x+1)*(d+1));
}
}
}
}
static double clampRange(double x,double lo,double hi){ if(x<lo) return lo; if(x>hi) return hi; return x; }
void inferOrUpdate(const double s[SOM_DIM], int step) {
if(!initialized) init();
int bx=0, by=0;
double best=INF, second=INF;
for(int y=0;y<SOM_H;y++) {
for(int x=0;x<SOM_W;x++) {
double d2=0;
for(int k=0;k<SOM_DIM;k++) {
double z = s[k] - W[y][x][k];
d2 += z*z;
}
if(d2 < best) { second = best; best=d2; bx=x; by=y; }
else if(d2 < second) second = d2;
}
}
bmuX = bx; bmuY = by;
double d1 = sqrt(best + EPS);
double d2 = sqrt(second + EPS);
conf = clampRange((d2 - d1) / (d2 + EPS), 0.0, 1.0);
hitCount[bmuY][bmuX]++;
#if SOM_ONLINE_UPDATE
double alpha = SOM_ALPHA_MIN + (SOM_ALPHA_MAX - SOM_ALPHA_MIN) * exp(-0.005 * step);
double sigma = SOM_SIGMA_MIN + (SOM_SIGMA_MAX - SOM_SIGMA_MIN) * exp(-0.005 * step);
for(int y=0;y<SOM_H;y++) {
for(int x=0;x<SOM_W;x++) {
double gd2 = (double)((x-bmuX)*(x-bmuX) + (y-bmuY)*(y-bmuY));
double h = exp(-gd2 / (2.0*sigma*sigma + EPS));
for(int k=0;k<SOM_DIM;k++) {
W[y][x][k] += alpha * h * (s[k] - W[y][x][k]);
}
}
}
#endif
}
int regimeId() const { return bmuY * SOM_W + bmuX; }
};
class SOMPlaybook {
public:
int region;
double riskScale;
void init() { region = 0; riskScale = 1.0; }
void apply(const SOMModel& som, int* topK, double* scoreScale) {
int mx = som.bmuX;
int my = som.bmuY;
int cx = (mx >= SOM_W/2) ? 1 : 0;
int cy = (my >= SOM_H/2) ? 1 : 0;
region = cy * 2 + cx;
if(region == 0) { *scoreScale *= 1.02; riskScale = 1.00; }
else if(region == 1) { *scoreScale *= 0.95; riskScale = 0.85; if(*topK > 3) (*topK)--; }
else if(region == 2) { *scoreScale *= 0.90; riskScale = 0.70; if(*topK > 3) (*topK)--; }
else { *scoreScale *= 0.80; riskScale = 0.50; if(*topK > 2) (*topK)-=2; }
if(som.conf < SOM_CONF_MIN) {
riskScale *= 0.8;
if(*topK > 2) (*topK)--;
}
if(*topK < 1) *topK = 1;
if(*topK > TOP_K) *topK = TOP_K;
if(*scoreScale < 0.10) *scoreScale = 0.10;
if(*scoreScale > 2.00) *scoreScale = 2.00;
}
};
class StrategyController {
public:
UnsupervisedModel unsup;
RLAgent rl;
PCAModel pca;
GMMRegimeModel gmm;
HMMRegimeModel hmm;
KMeansRegimeModel kmeans;
int dynamicTopK;
double scoreScale;
int regime;
double adaptiveGamma;
double adaptiveAlpha;
double adaptiveBeta;
double adaptiveLambda;
double riskScale;
int cooldown;
StrategyController()
: dynamicTopK(TOP_K), scoreScale(1.0), regime(0),
adaptiveGamma(1.0), adaptiveAlpha(1.0), adaptiveBeta(1.0), adaptiveLambda(1.0), riskScale(1.0), cooldown(0) {}
static double clampRange(double x, double lo, double hi) {
if(x < lo) return lo;
if(x > hi) return hi;
return x;
}
void init() {
unsup.init();
rl.init();
pca.init();
gmm.init();
hmm.init();
kmeans.init();
dynamicTopK = TOP_K;
scoreScale = 1.0;
regime = 0;
adaptiveGamma = 1.0;
adaptiveAlpha = 1.0;
adaptiveBeta = 1.0;
adaptiveLambda = 1.0;
riskScale = 1.0;
cooldown = 0;
}
void buildGMMState(const LearningSnapshot& snap, int reg, double conf, double x[GMM_DIM]) {
x[0] = snap.meanScore;
x[1] = snap.meanCompactness;
x[2] = snap.meanVol;
x[3] = pca.dom;
x[4] = pca.rot;
x[5] = (double)reg / 2.0;
x[6] = conf;
x[7] = snap.meanScore - snap.meanCompactness;
}
void buildHMMObs(const LearningSnapshot& snap, int reg, double conf, double x[HMM_DIM]) {
x[0] = pca.latent[0];
x[1] = pca.latent[1];
x[2] = pca.latent[2];
x[3] = snap.meanVol;
x[4] = snap.meanScore;
x[5] = snap.meanCompactness;
x[6] = (double)reg / 2.0;
x[7] = conf;
}
void buildKMeansState(const LearningSnapshot& snap, int reg, double conf, double x[KMEANS_DIM]) {
x[0] = pca.latent[0];
x[1] = pca.latent[1];
x[2] = pca.latent[2];
x[3] = snap.meanVol;
x[4] = snap.meanScore;
x[5] = snap.meanCompactness;
x[6] = (double)reg / 2.0;
x[7] = conf;
}
void onUpdate(const LearningSnapshot& snap, fvar* scores, int nScores, int updateCount) {
#if USE_ML
double unsupConf = 0;
unsup.update(snap, ®ime, &unsupConf);
#if USE_PCA
pca.update(snap, regime, unsupConf);
#else
pca.dom = 0.5;
pca.rot = 0.0;
#endif
#if USE_GMM
double gx[GMM_DIM];
buildGMMState(snap, regime, unsupConf, gx);
gmm.infer(gx);
#if USE_HMM
double hx[HMM_DIM];
buildHMMObs(snap, regime, unsupConf, hx);
hmm.filter(hx);
#if USE_KMEANS
double kx[KMEANS_DIM];
buildKMeansState(snap, regime, unsupConf, kx);
kmeans.predictAndUpdate(kx);
#endif
#endif
// regime presets: [gamma, alpha, beta, lambda]
const double presets[GMM_K][4] = {
{1.05, 1.00, 0.95, 1.00},
{0.95, 1.05, 1.05, 0.95},
{1.00, 0.95, 1.10, 1.05}
};
adaptiveGamma = 0;
adaptiveAlpha = 0;
adaptiveBeta = 0;
adaptiveLambda = 0;
for(int k=0;k<GMM_K;k++) {
#if USE_HMM
adaptiveGamma += hmm.posterior[k] * presets[k][0];
adaptiveAlpha += hmm.posterior[k] * presets[k][1];
adaptiveBeta += hmm.posterior[k] * presets[k][2];
adaptiveLambda += hmm.posterior[k] * presets[k][3];
#else
adaptiveGamma += gmm.p[k] * presets[k][0];
adaptiveAlpha += gmm.p[k] * presets[k][1];
adaptiveBeta += gmm.p[k] * presets[k][2];
adaptiveLambda += gmm.p[k] * presets[k][3];
#endif
}
#if USE_HMM
double entNorm = hmm.entropy / log((double)HMM_K + EPS);
riskScale = clampRange(1.0 - 0.45 * entNorm, HMM_MIN_RISK, 1.0);
if(hmm.entropy > HMM_ENTROPY_TH || hmm.switchProb > HMM_SWITCH_TH) cooldown = HMM_COOLDOWN_UPDATES;
else if(cooldown > 0) cooldown--;
#else
double entNorm = gmm.entropy / log((double)GMM_K + EPS);
riskScale = clampRange(1.0 - GMM_ENTROPY_COEFF * entNorm, GMM_MIN_RISK, 1.0);
#endif
#else
adaptiveGamma = 1.0 + 0.35 * pca.dom - 0.25 * pca.rot;
adaptiveAlpha = 1.0 + 0.30 * pca.dom;
adaptiveBeta = 1.0 + 0.25 * pca.rot;
adaptiveLambda = 1.0 + 0.20 * pca.dom - 0.20 * pca.rot;
riskScale = 1.0;
#endif
adaptiveGamma = clampRange(adaptiveGamma, 0.80, 1.40);
adaptiveAlpha = clampRange(adaptiveAlpha, 0.85, 1.35);
adaptiveBeta = clampRange(adaptiveBeta, 0.85, 1.35);
adaptiveLambda = clampRange(adaptiveLambda, 0.85, 1.25);
#if USE_KMEANS
const double kmPreset[KMEANS_K][4] = {
{1.02, 1.00, 0.98, 1.00},
{1.08, 0.96, 0.95, 1.02},
{0.94, 1.08, 1.08, 0.92}
};
int kr = kmeans.regime;
if(kr < 0) kr = 0;
if(kr >= KMEANS_K) kr = KMEANS_K - 1;
double wkm = clampRange(kmeans.stability, 0.0, 1.0);
adaptiveGamma = (1.0 - wkm) * adaptiveGamma + wkm * kmPreset[kr][0];
adaptiveAlpha = (1.0 - wkm) * adaptiveAlpha + wkm * kmPreset[kr][1];
adaptiveBeta = (1.0 - wkm) * adaptiveBeta + wkm * kmPreset[kr][2];
adaptiveLambda = (1.0 - wkm) * adaptiveLambda + wkm * kmPreset[kr][3];
if(kmeans.stability < KMEANS_STABILITY_MIN) {
riskScale *= 0.85;
if(cooldown < 1) cooldown = 1;
}
#endif
rl.updateReward(snap.meanScore);
rl.lastAction = rl.chooseAction(updateCount);
int baseTopK = TOP_K;
if(rl.lastAction == 0) baseTopK = TOP_K - 2;
else if(rl.lastAction == 1) baseTopK = TOP_K;
else if(rl.lastAction == 2) baseTopK = TOP_K;
else baseTopK = TOP_K - 1;
double profileBias[5] = {1.00, 0.98, 0.99, 0.97, 1.02};
scoreScale = (1.0 + 0.06 * (adaptiveGamma - 1.0) + 0.04 * (adaptiveAlpha - 1.0) - 0.04 * (adaptiveBeta - 1.0))
* profileBias[STRATEGY_PROFILE] * riskScale;
if(pca.dom > 0.60) baseTopK -= 1;
if(pca.rot > 0.15) baseTopK -= 1;
#if USE_HMM
if(hmm.regime == 2) baseTopK -= 1;
if(cooldown > 0) baseTopK -= 1;
#if USE_KMEANS
if(kmeans.regime == 2) baseTopK -= 1;
#endif
#elif USE_GMM
if(gmm.bestRegime == 2) baseTopK -= 1;
#endif
dynamicTopK = baseTopK;
if(dynamicTopK < 1) dynamicTopK = 1;
if(dynamicTopK > TOP_K) dynamicTopK = TOP_K;
for(int i=0; i<nScores; i++) {
double s = (double)scores[i] * scoreScale;
if(s > 1.0) s = 1.0;
if(s < 0.0) s = 0.0;
scores[i] = (fvar)s;
}
#else
(void)snap; (void)scores; (void)nScores; (void)updateCount;
#endif
}
};
// ---------------------------- Strategy ----------------------------
class CrowdAverseStrategy {
public:
ExposureTable exposureTable;
FeatureBufferSoA featSoA;
OpenCLBackend openCL;
SlabAllocator<fvar> corrMatrix;
SlabAllocator<fvar> distMatrix;
SlabAllocator<fvar> compactness;
SlabAllocator<fvar> entropy;
SlabAllocator<fvar> scores;
SlabAllocator<float> featLinear;
SlabAllocator<float> corrLinear;
int barCount;
int updateCount;
StrategyController controller;
HierarchicalClusteringModel hclust;
CommunityDetectionModel comm;
AutoencoderModel ae;
NoveltyController novelty;
SOMModel som;
SOMPlaybook somPlaybook;
CrowdAverseStrategy() : barCount(0), updateCount(0) {}
void init() {
printf("CrowdAverse_v13: Initializing...\n");
exposureTable.init();
featSoA.init(N_ASSETS, FEAT_WINDOW);
corrMatrix.init(N_ASSETS * N_ASSETS);
distMatrix.init(N_ASSETS * N_ASSETS);
compactness.init(N_ASSETS);
entropy.init(N_ASSETS);
scores.init(N_ASSETS);
featLinear.init(FEAT_N * N_ASSETS * FEAT_WINDOW);
corrLinear.init(N_ASSETS * N_ASSETS);
openCL.init();
printf("CrowdAverse_v13: Ready (OpenCL=%d)\n", openCL.ready);
controller.init();
hclust.init();
comm.init();
ae.init();
novelty.init();
som.init();
somPlaybook.init();
barCount = 0;
updateCount = 0;
}
void shutdown() {
printf("CrowdAverse_v13: Shutting down...\n");
openCL.shutdown();
featSoA.shutdown();
corrMatrix.shutdown();
distMatrix.shutdown();
compactness.shutdown();
entropy.shutdown();
scores.shutdown();
featLinear.shutdown();
corrLinear.shutdown();
}
void computeFeatures(int assetIdx) {
asset((char*)ASSET_NAMES[assetIdx]);
vars C = series(priceClose(0));
vars V = series(Volatility(C, 20));
if(Bar < 50) return;
fvar r1 = (fvar)log(C[0] / C[1]);
fvar rN = (fvar)log(C[0] / C[12]);
fvar vol = (fvar)V[0];
fvar zscore = (fvar)((C[0] - C[50]) / (V[0] * 20.0 + EPS));
fvar rangeP = (fvar)((C[0] - C[50]) / (C[0] + EPS));
fvar flow = (fvar)(r1 * vol);
fvar regime = (fvar)((vol > 0.001) ? 1.0 : 0.0);
fvar volOfVol = (fvar)(vol * vol);
fvar persistence = (fvar)fabs(r1);
featSoA.push(0, assetIdx, r1);
featSoA.push(1, assetIdx, rN);
featSoA.push(2, assetIdx, vol);
featSoA.push(3, assetIdx, zscore);
featSoA.push(4, assetIdx, rangeP);
featSoA.push(5, assetIdx, flow);
featSoA.push(6, assetIdx, regime);
featSoA.push(7, assetIdx, volOfVol);
featSoA.push(8, assetIdx, persistence);
}
fvar computeEntropy(int assetIdx) {
fvar mean = 0;
for(int t=0; t<FEAT_WINDOW; t++) mean += featSoA.get(0, assetIdx, t);
mean /= FEAT_WINDOW;
fvar var = 0;
for(int t=0; t<FEAT_WINDOW; t++) { fvar d = featSoA.get(0, assetIdx, t) - mean; var += d*d; }
return (fvar)(var / FEAT_WINDOW);
}
void computeCorrelationMatrixCPU() {
for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrMatrix[i] = 0;
for(int f=0; f<FEAT_N; f++){
for(int a=0; a<N_ASSETS; a++){
for(int b=a+1; b<N_ASSETS; b++){
fvar mx = 0, my = 0;
for(int t=0; t<FEAT_WINDOW; t++){
mx += featSoA.get(f,a,t);
my += featSoA.get(f,b,t);
}
mx /= (fvar)FEAT_WINDOW;
my /= (fvar)FEAT_WINDOW;
fvar sxx = 0, syy = 0, sxy = 0;
for(int t=0; t<FEAT_WINDOW; t++){
fvar dx = featSoA.get(f,a,t) - mx;
fvar dy = featSoA.get(f,b,t) - my;
sxx += dx*dx;
syy += dy*dy;
sxy += dx*dy;
}
fvar den = (fvar)sqrt((double)(sxx*syy + (fvar)EPS));
fvar corr = 0;
if(den > (fvar)EPS) corr = sxy / den;
else corr = 0;
int idx = a*N_ASSETS + b;
corrMatrix[idx] += corr / (fvar)FEAT_N;
corrMatrix[b*N_ASSETS + a] = corrMatrix[idx];
}
}
}
}
void buildFeatLinear() {
int idx = 0;
for(int f=0; f<FEAT_N; f++){
for(int a=0; a<N_ASSETS; a++){
for(int t=0; t<FEAT_WINDOW; t++){
featLinear[idx] = (float)featSoA.get(f, a, t);
idx++;
}
}
}
}
void computeCorrelationMatrix() {
if(openCL.ready) {
buildFeatLinear();
for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrLinear[i] = 0.0f;
int ok = openCL.computeCorrelationMatrixCL(
featLinear.data,
corrLinear.data,
N_ASSETS,
FEAT_N,
FEAT_WINDO
210
65,908
Read More
|
|
03/05/26 02:40
void buildFeatLinear() {
int idx = 0;
for(int f=0; f<FEAT_N; f++){
for(int a=0; a<N_ASSETS; a++){
for(int t=0; t<FEAT_WINDOW; t++){
featLinear[idx] = (float)featSoA.get(f, a, t);
idx++;
}
}
}
}
void computeCorrelationMatrix() {
if(openCL.ready) {
buildFeatLinear();
for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrLinear[i] = 0.0f;
int ok = openCL.computeCorrelationMatrixCL(
featLinear.data,
corrLinear.data,
N_ASSETS,
FEAT_N,
FEAT_WINDOW
);
if(ok) {
for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrMatrix[i] = (fvar)0;
for(int a=0; a<N_ASSETS; a++){
corrMatrix[a*N_ASSETS + a] = (fvar)1.0;
for(int b=a+1; b<N_ASSETS; b++){
float c = corrLinear[a*N_ASSETS + b];
corrMatrix[a*N_ASSETS + b] = (fvar)c;
corrMatrix[b*N_ASSETS + a] = (fvar)c;
}
}
return;
}
printf("OpenCL: runtime fail -> CPU fallback\n");
openCL.ready = 0;
}
computeCorrelationMatrixCPU();
}
void computeDistanceMatrix() {
for(int i=0;i<N_ASSETS;i++){
for(int j=0;j<N_ASSETS;j++){
if(i == j) {
distMatrix[i*N_ASSETS + j] = (fvar)0;
} else {
fvar corrDist = (fvar)1.0 - (fvar)fabs((double)corrMatrix[i*N_ASSETS + j]);
fvar expDist = (fvar)exposureTable.getDist(i, j);
fvar blended = (fvar)LAMBDA_META * corrDist + (fvar)(1.0 - (double)LAMBDA_META) * expDist;
distMatrix[i*N_ASSETS + j] = blended;
}
}
}
}
void floydWarshall() {
fvar d[28][28];
for(int i=0;i<N_ASSETS;i++){
for(int j=0;j<N_ASSETS;j++){
d[i][j] = distMatrix[i*N_ASSETS + j];
if(i == j) d[i][j] = (fvar)0;
if(d[i][j] < (fvar)0) d[i][j] = (fvar)INF;
}
}
for(int k=0;k<N_ASSETS;k++){
for(int i=0;i<N_ASSETS;i++){
for(int j=0;j<N_ASSETS;j++){
if(d[i][k] < (fvar)INF && d[k][j] < (fvar)INF) {
fvar nk = d[i][k] + d[k][j];
if(nk < d[i][j]) d[i][j] = nk;
}
}
}
}
for(int i=0;i<N_ASSETS;i++){
fvar w = 0;
for(int j=i+1;j<N_ASSETS;j++){
if(d[i][j] < (fvar)INF) w += d[i][j];
}
if(w > (fvar)0) compactness[i] = (fvar)(1.0 / (1.0 + (double)w));
else compactness[i] = (fvar)0;
}
}
void computeScores() {
for(int i=0;i<N_ASSETS;i++){
fvar coupling = 0;
int count = 0;
for(int j=0;j<N_ASSETS;j++){
if(i != j && distMatrix[i*N_ASSETS + j] < (fvar)INF) {
coupling += compactness[j];
count++;
}
}
fvar pCouple = 0;
if(count > 0) pCouple = coupling / (fvar)count;
else pCouple = (fvar)0;
fvar regime = featSoA.get(6, i, 0);
fvar rawScore = (fvar)ALPHA * regime + (fvar)GAMMA * compactness[i] - (fvar)BETA * pCouple;
if(rawScore > (fvar)30) rawScore = (fvar)30;
if(rawScore < (fvar)-30) rawScore = (fvar)-30;
scores[i] = (fvar)(1.0 / (1.0 + exp(-(double)rawScore)));
}
}
LearningSnapshot buildSnapshot() {
LearningSnapshot s;
s.meanScore = 0;
s.meanCompactness = 0;
s.meanVol = 0;
for(int i=0;i<N_ASSETS;i++) {
s.meanScore += (double)scores[i];
s.meanCompactness += (double)compactness[i];
s.meanVol += (double)featSoA.get(2, i, 0);
}
s.meanScore /= (double)N_ASSETS;
s.meanCompactness /= (double)N_ASSETS;
s.meanVol /= (double)N_ASSETS;
s.regime = 0;
s.regimeConfidence = 0;
return s;
}
void onBar() {
barCount++;
for(int i=0;i<N_ASSETS;i++) computeFeatures(i);
if(barCount % UPDATE_EVERY == 0) {
updateCount++;
computeCorrelationMatrix();
computeDistanceMatrix();
#if USE_COMMUNITY
hclust.update(distMatrix.data);
#endif
#if USE_COMMUNITY
comm.update(corrMatrix.data, distMatrix.data);
#endif
floydWarshall();
computeScores();
controller.onUpdate(buildSnapshot(), scores.data, N_ASSETS, updateCount);
#if USE_AE
double aeState[AE_INPUT_DIM];
double ms=0, mc=0, mv=0;
for(int i=0;i<N_ASSETS;i++){ ms += (double)scores[i]; mc += (double)compactness[i]; mv += (double)featSoA.get(2, i, 0); }
ms /= (double)N_ASSETS; mc /= (double)N_ASSETS; mv /= (double)N_ASSETS;
aeState[0] = ms;
aeState[1] = mc;
aeState[2] = mv;
aeState[3] = controller.scoreScale;
aeState[4] = (double)controller.dynamicTopK;
aeState[5] = (double)barCount / (double)(LookBack + 1);
aeState[6] = (double)updateCount / 1000.0;
aeState[7] = (double)openCL.ready;
double reconErr = ae.infer(aeState);
novelty.update(reconErr);
novelty.apply(&controller.dynamicTopK, &controller.scoreScale);
for(int i=0;i<N_ASSETS;i++){{
double s = (double)scores[i] * novelty.riskScale;
if(s > 1.0) s = 1.0;
if(s < 0.0) s = 0.0;
scores[i] = (fvar)s;
}}
#endif
printTopK();
}
}
void printTopK() {
int indices[N_ASSETS];
for(int i=0;i<N_ASSETS;i++) indices[i] = i;
int topN = controller.dynamicTopK;
#if USE_COMMUNITY
if(comm.qSmooth < (fvar)COMM_Q_LOW && topN > 2) topN--;
if(comm.qSmooth > (fvar)COMM_Q_HIGH && topN < TOP_K) topN++;
#endif
for(int i=0;i<topN;i++){
for(int j=i+1;j<N_ASSETS;j++){
if(scores[indices[j]] > scores[indices[i]]) {
int tmp = indices[i];
indices[i] = indices[j];
indices[j] = tmp;
}
}
}
if(updateCount % 10 == 0) {
printf("===CompactDominant_v13 Top-K(update#%d,OpenCL=%d)===\n",
updateCount, openCL.ready);
#if USE_COMMUNITY
printf(" communities=%d Q=%.4f\n", comm.nCommunities, (double)comm.qSmooth);
#endif
int selected[N_ASSETS];
int selCount = 0;
#if USE_COMMUNITY
int coarseUsed[HCLUST_COARSE_K];
int fineTake[HCLUST_FINE_K];
int fineCap = (topN + HCLUST_FINE_K - 1) / HCLUST_FINE_K;
for(int c=0;c<HCLUST_COARSE_K;c++) coarseUsed[c] = 0;
for(int c=0;c<HCLUST_FINE_K;c++) fineTake[c] = 0;
for(int i=0;i<topN;i++){
int idx = indices[i];
int cid = comm.clusterCoarse[idx];
if(cid < 0 || cid >= HCLUST_COARSE_K) cid = 0;
if(coarseUsed[cid]) continue;
coarseUsed[cid] = 1;
selected[selCount++] = idx;
int fid = comm.clusterFine[idx];
if(fid < 0 || fid >= HCLUST_FINE_K) fid = 0;
fineTake[fid]++;
}
for(int i=0;i<topN && selCount<topN;i++){
int idx = indices[i];
int dup = 0;
for(int k=0;k<selCount;k++) if(selected[k]==idx){ dup=1; break; }
if(dup) continue;
int fid = comm.clusterFine[idx];
if(fid < 0 || fid >= HCLUST_FINE_K) fid = 0;
if(fineTake[fid] >= fineCap) continue;
selected[selCount++] = idx;
fineTake[fid]++;
}
#else
for(int i=0;i<topN;i++) selected[selCount++] = indices[i];
#endif
for(int i=0;i<selCount;i++){
int idx = selected[i];
printf(" %d.%s: score=%.4f, C=%.4f\n", i+1, ASSET_NAMES[idx], (double)scores[idx], (double)compactness[idx]);
}
}
}
};
// ---------------------------- Zorro DLL entry ----------------------------
static CompactDominantStrategy* S = NULL;
DLLFUNC void run()
{
if(is(INITRUN)) {
BarPeriod = 60;
LookBack = max(LookBack, FEAT_WINDOW + 50);
asset((char*)ASSET_NAMES[0]);
if(!S) {
S = new CompactDominantStrategy();
S->init();
}
}
if(is(EXITRUN)) {
if(S) {
S->shutdown();
delete S;
S = NULL;
}
return;
}
if(!S || Bar < LookBack)
return;
S->onBar();
}
210
65,908
Read More
|
|
03/05/26 01:56
This code is a Zorro strategy DLL that builds a dynamic map of a multi currency FX basket and then repeatedly picks a small set of “best” assets based on how structurally central and non redundant they are inside that map. The core idea is that each asset becomes a node in a graph, edges represent similarity and shared exposure, and the strategy scores each node by how “compact” its connections are while penalizing nodes that are too coupled to other already compact nodes. It runs on an hourly bar schedule and updates the heavy graph computations every few bars to avoid noise and reduce cost. A key engineering goal is speed and robustness: OpenCL is used to accelerate the most expensive step, but everything has a full CPU fallback so the strategy still runs if OpenCL is missing or fails. At startup the strategy prepares a fixed universe of twenty eight FX pairs and defines nine rolling features per asset. It stores feature history in a compact structure of arrays ring buffer so each feature for each asset is written into a contiguous slab of memory and retrieved by “most recent back in time” indexing. The feature set is designed to represent short term and medium term change, volatility, standardized deviation, range behavior, simple “flow” coupling return and volatility, a binary volatility regime flag, volatility of volatility, and return persistence. Every bar, for every asset, the strategy computes these values from Zorro’s price series and pushes them into the ring buffer. This creates a synchronized multi asset feature tape that the later graph logic can compare across assets. Every few bars the strategy performs an “update cycle.” In that cycle it first computes a correlation matrix across assets, but not using just one signal: it computes a correlation per feature and then averages across the nine features to get a single similarity number for each asset pair. This is the heaviest computation because it is pairwise across assets and also loops over the entire feature window. The code therefore offers two implementations: a CPU triple loop that computes means and deviations for each pair and feature, and a GPU accelerated OpenCL kernel that does the same in parallel for many pairs at once. The OpenCL part is deliberately minimal and loaded dynamically: it tries to load OpenCL.dll at runtime, resolves all needed function pointers, selects a GPU device when possible (otherwise a CPU device), builds a kernel from embedded source, allocates buffers, and marks itself “ready.” If any step fails, it prints a reason and the strategy falls back to CPU without crashing. The OpenCL kernel is focused on one job: “corr_pairwise” takes a flattened feature history array and writes out an upper triangle of averaged correlations. Each work item corresponds to one asset pair. For each feature it computes the mean of the window for asset A and asset B, then computes the variance terms and covariance term, forms a normalized correlation value, and adds it into an accumulator. At the end it divides by number of features and stores one correlation value for that pair. After kernel completion, the host reads the result back and fills a symmetric correlation matrix, setting diagonals to one. If the OpenCL runtime fails during execution (write, kernel enqueue, read), the code disables OpenCL and uses the CPU implementation thereafter. Once a correlation matrix exists, the strategy converts similarity into distance. It treats high absolute correlation as “close,” low correlation as “far,” and then blends that correlation distance with an exposure based distance coming from an exposure table. The exposure table is structured to encode currency overlaps between pairs (for example two pairs sharing USD exposure), and it includes a precomputed distance between assets based on that overlap. In the snippet shown, the table is initialized but the fill logic is not included, so in practice the exposure distance may be all zeros unless filled elsewhere. Conceptually, though, the blend is important: correlation captures observed co movement, exposure captures structural currency linkage, and the lambda parameter controls how much the map follows behavior versus structure. With a distance matrix in hand, the strategy runs a graph shortest path routine (Floyd Warshall) to compute indirect distances through the network. This is a crucial conceptual step: it makes the “shape” of the asset space matter, not just direct pairwise relationships. Two assets might not be directly close, but if there is a chain of moderately close links between them, the shortest path will be relatively small. After computing all pairs shortest paths, the strategy defines compactness for each asset as a function of how small its distances are to others: it sums distances from the asset to the rest and transforms that sum into a bounded compactness score where smaller total distance means higher compactness. This is effectively a “centrality” measure: an asset is compact if it sits in a region where everything is reachable with low distance. The scoring stage combines three forces. First, it includes a regime signal derived from the asset’s volatility flag. Second, it strongly rewards the asset’s own compactness with a dominant multiplier, reflecting the idea that central nodes represent stable, broadly connected opportunities. Third, it subtracts a coupling penalty based on the average compactness of its neighbors. The coupling penalty discourages picking assets that sit in a cluster of already compact assets because that would increase redundancy. The raw score is then squashed into a bounded probability like score between zero and one. The result is a per asset score array that can be ranked. On top of this graph based score engine, the code layers a “learning controller” whose job is not to predict price directly but to control how aggressively the strategy trusts and concentrates its selection. It builds a compact snapshot each update: mean score, mean compactness, and mean volatility across the universe. That snapshot feeds multiple lightweight models that estimate “regimes” and “confidence.” There is an unsupervised centroid model that assigns the snapshot to one of a few clusters and produces a confidence based on how separated the best and second best cluster distances are. There is a PCA style monitor that maintains rolling normalization statistics and produces latent factors, plus two diagnostics called dominance and rotation that measure how concentrated the latent explanation is and how fast it is changing. Then come optional regime detectors: a diagonal Gaussian mixture model and a hidden Markov model with diagonal emissions. Both produce a posterior distribution over regimes, an entropy measure, and a confidence measure; the HMM also produces a switching probability based on transition self probability. There is also an online K means regime model that tracks centroid distance stability. These models are “online” in the sense that they slowly update their internal parameters with each new observation, using small smoothing rates to avoid overreacting. The point is not perfect statistical modeling but a robust sense of whether the environment is stable, ambiguous, or changing quickly. The controller converts these regime posteriors into adaptive multipliers for the main scoring recipe. It uses preset parameter profiles per regime and blends them by posterior weights, producing adaptive versions of the score weights and the meta blend factor. It also produces a risk scale derived from regime uncertainty: higher entropy or higher switching probability reduces risk, triggers cooldown periods, and lowers the number of assets selected. A small reinforcement style agent is also included: it keeps four action values, chooses an action periodically, and updates action value based on improvements in mean score. The action influences the base “top K” concentration, so sometimes the strategy intentionally becomes more selective or less selective. Beyond regime control, there is a novelty detection layer using a small handcrafted autoencoder. It normalizes eight inputs describing the strategy state and reconstructs them through a low dimensional latent mapping; reconstruction error is treated as novelty. The novelty controller keeps an exponential moving average and variance of reconstruction error, converts current error into a standardized surprise measure, and when surprise is high it reduces risk scale and shrinks top K. This acts as an anomaly brake: if the strategy’s own internal summary suddenly looks unfamiliar relative to recent history, it automatically de risks even if the graph score looks attractive. The code also includes additional structure discovery modules: spectral clustering surrogate, hierarchical clustering, and community detection. The hierarchical clustering builds a dendrogram using a simple agglomerative merge and then cuts it into coarse and fine clusters. The community model builds a sparse graph from absolute correlations, optionally prunes to the strongest neighbors for stability, runs a label propagation like update, compresses labels into community IDs, and computes an approximate modularity score with smoothing. This modularity score is then used to nudge top K: low modularity suggests weak community structure so the strategy becomes more conservative, while high modularity allows slightly broader selection. When printing the final selection, the code tries to diversify picks by coarse clusters first, then fill remaining slots subject to a cap per fine cluster, so the chosen basket is not all from one tightly connected group. Finally, the strategy prints the top candidates periodically with their score and compactness and shows whether OpenCL is active. In Zorro’s run loop, initialization sets bar period and lookback, builds the strategy object once, and on each bar calls onBar after enough history is available. On exit it shuts down OpenCL and frees all slabs. Overall, the design is a hybrid of graph topology, multi feature similarity, optional GPU acceleration, and a stacked set of online “meta learners” that mostly act as risk and concentration governors rather than direct forecasters. // TGr06A_CompactDominant_v13.cpp - Zorro64 Strategy DLL
// Strategy A v13: Compactness-Dominant with MX06 OOP + OpenCL + Learning Controller
// Notes:
// - Keeps full CPU fallback.
// - OpenCL is optional: if OpenCL.dll missing / no device / kernel build fails -> CPU path.
// - OpenCL accelerates the heavy correlation matrix step by offloading pairwise correlations.
// - Correlation is computed in float on GPU; results are stored back into fvar corrMatrix.
#define _CRT_SECURE_NO_WARNINGS
#include <zorro.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <windows.h>
#include <stddef.h>
#define INF 1e30
#define EPS 1e-12
#define N_ASSETS 28
#define FEAT_N 9
#define FEAT_WINDOW 200
#define UPDATE_EVERY 5
#define TOP_K 5
#define ALPHA 0.1
#define BETA 0.2
#define GAMMA 3.0
#define LAMBDA_META 0.7
#define USE_ML 1
#define USE_UNSUP 1
#define USE_RL 1
#define USE_PCA 1
#define USE_GMM 1
#define USE_HMM 1
#define HMM_K 3
#define HMM_DIM 8
#define HMM_VAR_FLOOR 1e-4
#define HMM_SMOOTH 0.02
#define HMM_ENTROPY_TH 0.85
#define HMM_SWITCH_TH 0.35
#define HMM_MIN_RISK 0.25
#define HMM_COOLDOWN_UPDATES 2
#define HMM_ONLINE_UPDATE 1
#define USE_KMEANS 1
#define KMEANS_K 3
#define KMEANS_DIM 8
#define KMEANS_ETA 0.03
#define KMEANS_DIST_EMA 0.08
#define KMEANS_STABILITY_MIN 0.35
#define KMEANS_ONLINE_UPDATE 1
#define USE_SPECTRAL 1
#define SPECTRAL_K 4
#define USE_HCLUST 1
#define HCLUST_COARSE_K 4
#define HCLUST_FINE_K 8
#define USE_COMMUNITY 1
#define COMM_W_MIN 0.15
#define COMM_TOPM 6
#define COMM_ITERS 4
#define COMM_Q_EMA 0.20
#define COMM_Q_LOW 0.20
#define COMM_Q_HIGH 0.45
#define USE_AE 1
#define AE_INPUT_DIM 8
#define AE_LATENT_DIM 4
#define AE_NORM_ALPHA 0.02
#define AE_ERR_EMA 0.10
#define AE_Z_LOW 1.0
#define AE_Z_HIGH 2.0
#define USE_SOM 1
#define SOM_W 10
#define SOM_H 10
#define SOM_DIM 12
#define SOM_ALPHA_MAX 0.30
#define SOM_ALPHA_MIN 0.05
#define SOM_SIGMA_MAX 5.0
#define SOM_SIGMA_MIN 1.0
#define SOM_CONF_MIN 0.15
#define SOM_ONLINE_UPDATE 1
#define GMM_K 3
#define GMM_DIM 8
#define GMM_ALPHA 0.02
#define GMM_VAR_FLOOR 1e-4
#define GMM_ENTROPY_COEFF 0.45
#define GMM_MIN_RISK 0.25
#define GMM_ONLINE_UPDATE 1
#define STRATEGY_PROFILE 0
#define PCA_DIM 6
#define PCA_COMP 3
#define PCA_WINDOW 128
#define PCA_REBUILD_EVERY 4
#ifdef TIGHT_MEM
typedef float fvar;
#else
typedef double fvar;
#endif
static const char* ASSET_NAMES[] = {
"EURUSD","GBPUSD","USDCHF","USDJPY","AUDUSD","AUDCAD","AUDCHF","AUDJPY","AUDNZD",
"CADJPY","CADCHF","EURAUD","EURCAD","EURCHF","EURGBP","EURJPY","EURNZD","GBPAUD",
"GBPCAD","GBPCHF","GBPJPY","GBPNZD","NZDCAD","NZDCHF","NZDJPY","NZDUSD","USDCAD"
};
static const char* CURRENCIES[] = {"EUR","GBP","USD","CHF","JPY","AUD","CAD","NZD"};
#define N_CURRENCIES 8
// ---------------------------- Exposure Table ----------------------------
struct ExposureTable {
int exposure[N_ASSETS][N_CURRENCIES];
double exposureDist[N_ASSETS][N_ASSETS];
void init() {
for(int i=0;i<N_ASSETS;i++){
for(int c=0;c<N_CURRENCIES;c++){
exposure[i][c] = 0;
}
}
for(int i=0;i<N_ASSETS;i++){
for(int j=0;j<N_ASSETS;j++){
exposureDist[i][j] = 0.0;
}
}
}
inline double getDist(int i,int j) const { return exposureDist[i][j]; }
};
// ---------------------------- Slab Allocator ----------------------------
template<typename T>
class SlabAllocator {
public:
T* data;
int capacity;
SlabAllocator() : data(NULL), capacity(0) {}
~SlabAllocator() { shutdown(); }
void init(int size) {
shutdown();
capacity = size;
data = (T*)malloc((size_t)capacity * sizeof(T));
if(data) memset(data, 0, (size_t)capacity * sizeof(T));
}
void shutdown() {
if(data) free(data);
data = NULL;
capacity = 0;
}
T& operator[](int i) { return data[i]; }
const T& operator[](int i) const { return data[i]; }
};
// ---------------------------- Feature Buffer (SoA ring) ----------------------------
struct FeatureBufferSoA {
SlabAllocator<fvar> buffer;
int windowSize;
int currentIndex;
void init(int assets, int window) {
windowSize = window;
currentIndex = 0;
buffer.init(FEAT_N * assets * window);
}
void shutdown() { buffer.shutdown(); }
inline int offset(int feat,int asset,int t) const {
return (feat * N_ASSETS + asset) * windowSize + t;
}
void push(int feat,int asset,fvar value) {
buffer[offset(feat, asset, currentIndex)] = value;
currentIndex = (currentIndex + 1) % windowSize;
}
// t=0 => most recent
fvar get(int feat,int asset,int t) const {
int idx = (currentIndex - 1 - t + windowSize) % windowSize;
return buffer[offset(feat, asset, idx)];
}
};
// ---------------------------- Minimal OpenCL (dynamic) ----------------------------
typedef struct _cl_platform_id* cl_platform_id;
typedef struct _cl_device_id* cl_device_id;
typedef struct _cl_context* cl_context;
typedef struct _cl_command_queue* cl_command_queue;
typedef struct _cl_program* cl_program;
typedef struct _cl_kernel* cl_kernel;
typedef struct _cl_mem* cl_mem;
typedef unsigned int cl_uint;
typedef int cl_int;
typedef unsigned long long cl_ulong;
typedef size_t cl_bool;
#define CL_SUCCESS 0
#define CL_DEVICE_TYPE_CPU (1ULL << 1)
#define CL_DEVICE_TYPE_GPU (1ULL << 2)
#define CL_MEM_READ_ONLY (1ULL << 2)
#define CL_MEM_WRITE_ONLY (1ULL << 1)
#define CL_MEM_READ_WRITE (1ULL << 0)
#define CL_TRUE 1
#define CL_FALSE 0
#define CL_PROGRAM_BUILD_LOG 0x1183
class OpenCLBackend {
public:
HMODULE hOpenCL;
int ready;
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kCorr;
cl_mem bufFeat;
cl_mem bufCorr;
int featBytes;
int corrBytes;
cl_int (*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*);
cl_int (*clGetDeviceIDs)(cl_platform_id, cl_ulong, cl_uint, cl_device_id*, cl_uint*);
cl_context (*clCreateContext)(void*, cl_uint, const cl_device_id*, void*, void*, cl_int*);
cl_command_queue (*clCreateCommandQueue)(cl_context, cl_device_id, cl_ulong, cl_int*);
cl_program (*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*);
cl_int (*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void*, void*);
cl_int (*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_uint, size_t, void*, size_t*);
cl_kernel (*clCreateKernel)(cl_program, const char*, cl_int*);
cl_int (*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*);
cl_mem (*clCreateBuffer)(cl_context, cl_ulong, size_t, void*, cl_int*);
cl_int (*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const void*, void*);
cl_int (*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const void*, void*);
cl_int (*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const void*, void*);
cl_int (*clFinish)(cl_command_queue);
cl_int (*clReleaseMemObject)(cl_mem);
cl_int (*clReleaseKernel)(cl_kernel);
cl_int (*clReleaseProgram)(cl_program);
cl_int (*clReleaseCommandQueue)(cl_command_queue);
cl_int (*clReleaseContext)(cl_context);
OpenCLBackend()
: hOpenCL(NULL), ready(0),
platform(NULL), device(NULL), context(NULL), queue(NULL), program(NULL), kCorr(NULL),
bufFeat(NULL), bufCorr(NULL),
featBytes(0), corrBytes(0),
clGetPlatformIDs(NULL), clGetDeviceIDs(NULL), clCreateContext(NULL), clCreateCommandQueue(NULL),
clCreateProgramWithSource(NULL), clBuildProgram(NULL), clGetProgramBuildInfo(NULL),
clCreateKernel(NULL), clSetKernelArg(NULL),
clCreateBuffer(NULL), clEnqueueWriteBuffer(NULL), clEnqueueReadBuffer(NULL),
clEnqueueNDRangeKernel(NULL), clFinish(NULL),
clReleaseMemObject(NULL), clReleaseKernel(NULL), clReleaseProgram(NULL),
clReleaseCommandQueue(NULL), clReleaseContext(NULL)
{}
int loadSymbol(void** fp, const char* name) {
*fp = (void*)GetProcAddress(hOpenCL, name);
return (*fp != NULL);
}
const char* kernelSource() {
return
"__kernel void corr_pairwise(\n"
" __global const float* feat,\n"
" __global float* outCorr,\n"
" const int nAssets,\n"
" const int nFeat,\n"
" const int windowSize,\n"
" const float eps\n"
"){\n"
" int a = (int)get_global_id(0);\n"
" int b = (int)get_global_id(1);\n"
" if(a >= nAssets || b >= nAssets) return;\n"
" if(a >= b) return;\n"
" float acc = 0.0f;\n"
" for(int f=0; f<nFeat; f++){\n"
" int baseA = (f*nAssets + a) * windowSize;\n"
" int baseB = (f*nAssets + b) * windowSize;\n"
" float mx = 0.0f;\n"
" float my = 0.0f;\n"
" for(int t=0; t<windowSize; t++){\n"
" mx += feat[baseA + t];\n"
" my += feat[baseB + t];\n"
" }\n"
" mx /= (float)windowSize;\n"
" my /= (float)windowSize;\n"
" float sxx = 0.0f;\n"
" float syy = 0.0f;\n"
" float sxy = 0.0f;\n"
" for(int t=0; t<windowSize; t++){\n"
" float dx = feat[baseA + t] - mx;\n"
" float dy = feat[baseB + t] - my;\n"
" sxx += dx*dx;\n"
" syy += dy*dy;\n"
" sxy += dx*dy;\n"
" }\n"
" float den = sqrt(sxx*syy + eps);\n"
" float corr = (den > eps) ? (sxy/den) : 0.0f;\n"
" acc += corr;\n"
" }\n"
" outCorr[a*nAssets + b] = acc / (float)nFeat;\n"
"}\n";
}
void printBuildLog() {
if(!clGetProgramBuildInfo || !program || !device) return;
size_t logSize = 0;
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
if(logSize == 0) return;
char* log = (char*)malloc(logSize + 1);
if(!log) return;
memset(log, 0, logSize + 1);
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
printf("OpenCL build log:\n%s\n", log);
free(log);
}
void init() {
ready = 0;
hOpenCL = LoadLibraryA("OpenCL.dll");
if(!hOpenCL) {
printf("OpenCL: CPU (OpenCL.dll missing)\n");
return;
}
if(!loadSymbol((void**)&clGetPlatformIDs, "clGetPlatformIDs")) return;
if(!loadSymbol((void**)&clGetDeviceIDs, "clGetDeviceIDs")) return;
if(!loadSymbol((void**)&clCreateContext, "clCreateContext")) return;
if(!loadSymbol((void**)&clCreateCommandQueue, "clCreateCommandQueue")) return;
if(!loadSymbol((void**)&clCreateProgramWithSource,"clCreateProgramWithSource")) return;
if(!loadSymbol((void**)&clBuildProgram, "clBuildProgram")) return;
if(!loadSymbol((void**)&clGetProgramBuildInfo, "clGetProgramBuildInfo")) return;
if(!loadSymbol((void**)&clCreateKernel, "clCreateKernel")) return;
if(!loadSymbol((void**)&clSetKernelArg, "clSetKernelArg")) return;
if(!loadSymbol((void**)&clCreateBuffer, "clCreateBuffer")) return;
if(!loadSymbol((void**)&clEnqueueWriteBuffer, "clEnqueueWriteBuffer")) return;
if(!loadSymbol((void**)&clEnqueueReadBuffer, "clEnqueueReadBuffer")) return;
if(!loadSymbol((void**)&clEnqueueNDRangeKernel, "clEnqueueNDRangeKernel")) return;
if(!loadSymbol((void**)&clFinish, "clFinish")) return;
if(!loadSymbol((void**)&clReleaseMemObject, "clReleaseMemObject")) return;
if(!loadSymbol((void**)&clReleaseKernel, "clReleaseKernel")) return;
if(!loadSymbol((void**)&clReleaseProgram, "clReleaseProgram")) return;
if(!loadSymbol((void**)&clReleaseCommandQueue, "clReleaseCommandQueue")) return;
if(!loadSymbol((void**)&clReleaseContext, "clReleaseContext")) return;
cl_uint nPlat = 0;
if(clGetPlatformIDs(0, NULL, &nPlat) != CL_SUCCESS || nPlat == 0) {
printf("OpenCL: CPU (no platform)\n");
return;
}
clGetPlatformIDs(1, &platform, NULL);
cl_uint nDev = 0;
cl_int ok = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &nDev);
if(ok != CL_SUCCESS || nDev == 0) {
ok = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, &nDev);
if(ok != CL_SUCCESS || nDev == 0) {
printf("OpenCL: CPU (no device)\n");
return;
}
}
cl_int err = 0;
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if(err != CL_SUCCESS || !context) {
printf("OpenCL: CPU (context fail)\n");
return;
}
queue = clCreateCommandQueue(context, device, 0, &err);
if(err != CL_SUCCESS || !queue) {
printf("OpenCL: CPU (queue fail)\n");
return;
}
const char* src = kernelSource();
program = clCreateProgramWithSource(context, 1, &src, NULL, &err);
if(err != CL_SUCCESS || !program) {
printf("OpenCL: CPU (program fail)\n");
return;
}
err = clBuildProgram(program, 1, &device, "", NULL, NULL);
if(err != CL_SUCCESS) {
printf("OpenCL: CPU (build fail)\n");
printBuildLog();
return;
}
kCorr = clCreateKernel(program, "corr_pairwise", &err);
if(err != CL_SUCCESS || !kCorr) {
printf("OpenCL: CPU (kernel fail)\n");
printBuildLog();
return;
}
featBytes = FEAT_N * N_ASSETS * FEAT_WINDOW * (int)sizeof(float);
corrBytes = N_ASSETS * N_ASSETS * (int)sizeof(float);
bufFeat = clCreateBuffer(context, CL_MEM_READ_ONLY, (size_t)featBytes, NULL, &err);
if(err != CL_SUCCESS || !bufFeat) {
printf("OpenCL: CPU (bufFeat fail)\n");
return;
}
bufCorr = clCreateBuffer(context, CL_MEM_WRITE_ONLY, (size_t)corrBytes, NULL, &err);
if(err != CL_SUCCESS || !bufCorr) {
printf("OpenCL: CPU (bufCorr fail)\n");
return;
}
ready = 1;
printf("OpenCL: READY (kernel+buffers)\n");
}
void shutdown() {
if(bufCorr) { clReleaseMemObject(bufCorr); bufCorr = NULL; }
if(bufFeat) { clReleaseMemObject(bufFeat); bufFeat = NULL; }
if(kCorr) { clReleaseKernel(kCorr); kCorr = NULL; }
if(program) { clReleaseProgram(program); program = NULL; }
if(queue) { clReleaseCommandQueue(queue); queue = NULL; }
if(context) { clReleaseContext(context); context = NULL; }
if(hOpenCL) { FreeLibrary(hOpenCL); hOpenCL = NULL; }
ready = 0;
}
int computeCorrelationMatrixCL(const float* featLinear, float* outCorr, int nAssets, int nFeat, int windowSize) {
if(!ready) return 0;
if(!featLinear || !outCorr) return 0;
cl_int err = clEnqueueWriteBuffer(queue, bufFeat, CL_TRUE, 0, (size_t)featBytes, featLinear, 0, NULL, NULL);
if(err != CL_SUCCESS) return 0;
float eps = 1e-12f;
err = CL_SUCCESS;
err |= clSetKernelArg(kCorr, 0, sizeof(cl_mem), &bufFeat);
err |= clSetKernelArg(kCorr, 1, sizeof(cl_mem), &bufCorr);
err |= clSetKernelArg(kCorr, 2, sizeof(int), &nAssets);
err |= clSetKernelArg(kCorr, 3, sizeof(int), &nFeat);
err |= clSetKernelArg(kCorr, 4, sizeof(int), &windowSize);
err |= clSetKernelArg(kCorr, 5, sizeof(float), &eps);
if(err != CL_SUCCESS) return 0;
size_t global[2];
global[0] = (size_t)nAssets;
global[1] = (size_t)nAssets;
err = clEnqueueNDRangeKernel(queue, kCorr, 2, NULL, global, NULL, 0, NULL, NULL);
if(err != CL_SUCCESS) return 0;
err = clFinish(queue);
if(err != CL_SUCCESS) return 0;
err = clEnqueueReadBuffer(queue, bufCorr, CL_TRUE, 0, (size_t)corrBytes, outCorr, 0, NULL, NULL);
if(err != CL_SUCCESS) return 0;
return 1;
}
};
// ---------------------------- Learning Layer ----------------------------
struct LearningSnapshot {
double meanScore;
double meanCompactness;
double meanVol;
int regime;
double regimeConfidence;
};
class UnsupervisedModel {
public:
double centroids[3][3];
int counts[3];
int initialized;
UnsupervisedModel() : initialized(0) {
memset(centroids, 0, sizeof(centroids));
memset(counts, 0, sizeof(counts));
}
void init() {
initialized = 0;
memset(centroids, 0, sizeof(centroids));
memset(counts, 0, sizeof(counts));
}
void update(const LearningSnapshot& s, int* regimeOut, double* confOut) {
double x[3];
x[0] = s.meanScore;
x[1] = s.meanCompactness;
x[2] = s.meanVol;
if(!initialized) {
for(int k=0; k<3; k++) {
centroids[k][0] = x[0] + 0.01 * (k - 1);
centroids[k][1] = x[1] + 0.01 * (1 - k);
centroids[k][2] = x[2] + 0.005 * (k - 1);
counts[k] = 1;
}
initialized = 1;
}
int best = 0;
double bestDist = INF;
double secondDist = INF;
for(int k=0; k<3; k++) {
double d0 = x[0] - centroids[k][0];
double d1 = x[1] - centroids[k][1];
double d2 = x[2] - centroids[k][2];
double dist = d0*d0 + d1*d1 + d2*d2;
if(dist < bestDist) {
secondDist = bestDist;
bestDist = dist;
best = k;
} else if(dist < secondDist) {
secondDist = dist;
}
}
counts[best]++;
double lr = 1.0 / (double)counts[best];
centroids[best][0] += lr * (x[0] - centroids[best][0]);
centroids[best][1] += lr * (x[1] - centroids[best][1]);
centroids[best][2] += lr * (x[2] - centroids[best][2]);
*regimeOut = best;
*confOut = 1.0 / (1.0 + sqrt(fabs(secondDist - bestDist) + EPS));
}
};
class RLAgent {
public:
double q[4];
int n[4];
double epsilon;
int lastAction;
double lastMeanScore;
RLAgent() : epsilon(0.10), lastAction(0), lastMeanScore(0) {
for(int i=0;i<4;i++){ q[i]=0; n[i]=0; }
}
void init() {
epsilon = 0.10;
lastAction = 0;
lastMeanScore = 0;
for(int i=0;i<4;i++){ q[i]=0; n[i]=0; }
}
int chooseAction(int updateCount) {
int exploratory = ((updateCount % 10) == 0);
if(exploratory) return updateCount % 4;
int best = 0;
for(int i=1;i<4;i++) if(q[i] > q[best]) best = i;
return best;
}
void updateReward(double newMeanScore) {
double reward = newMeanScore - lastMeanScore;
n[lastAction]++;
q[lastAction] += (reward - q[lastAction]) / (double)n[lastAction];
lastMeanScore = newMeanScore;
}
};
class PCAModel {
public:
double hist[PCA_WINDOW][PCA_DIM];
double mean[PCA_DIM];
double stdev[PCA_DIM];
double latent[PCA_COMP];
double explainedVar[PCA_COMP];
int writeIdx;
int count;
int rebuildEvery;
int updates;
double dom;
double rot;
double prevExplained0;
PCAModel() : writeIdx(0), count(0), rebuildEvery(PCA_REBUILD_EVERY), updates(0), dom(0), rot(0), prevExplained0(0) {
memset(hist, 0, sizeof(hist));
memset(mean, 0, sizeof(mean));
memset(stdev, 0, sizeof(stdev));
memset(latent, 0, sizeof(latent));
memset(explainedVar, 0, sizeof(explainedVar));
}
void init() {
writeIdx = 0;
count = 0;
updates = 0;
dom = 0;
rot = 0;
prevExplained0 = 0;
memset(hist, 0, sizeof(hist));
memset(mean, 0, sizeof(mean));
memset(stdev, 0, sizeof(stdev));
memset(latent, 0, sizeof(latent));
memset(explainedVar, 0, sizeof(explainedVar));
}
void pushSnapshot(const double x[PCA_DIM]) {
for(int d=0; d<PCA_DIM; d++) hist[writeIdx][d] = x[d];
writeIdx = (writeIdx + 1) % PCA_WINDOW;
if(count < PCA_WINDOW) count++;
}
void rebuildStats() {
if(count <= 0) return;
for(int d=0; d<PCA_DIM; d++) {
double m = 0;
for(int i=0; i<count; i++) m += hist[i][d];
m /= (double)count;
mean[d] = m;
double v = 0;
for(int i=0; i<count; i++) {
double dd = hist[i][d] - m;
v += dd * dd;
}
v /= (double)count;
stdev[d] = sqrt(v + EPS);
}
}
void update(const LearningSnapshot& snap, int regime, double conf) {
double x[PCA_DIM];
x[0] = snap.meanScore;
x[1] = snap.meanCompactness;
x[2] = snap.meanVol;
x[3] = (double)regime / 2.0;
x[4] = conf;
x[5] = snap.meanScore - snap.meanCompactness;
pushSnapshot(x);
updates++;
if((updates % rebuildEvery) == 0 || count < 4) rebuildStats();
double z[PCA_DIM];
for(int d=0; d<PCA_DIM; d++) z[d] = (x[d] - mean[d]) / (stdev[d] + EPS);
latent[0] = 0.60*z[0] + 0.30*z[1] + 0.10*z[2];
latent[1] = 0.25*z[0] - 0.45*z[1] + 0.20*z[2] + 0.10*z[4];
latent[2] = 0.20*z[2] + 0.50*z[3] - 0.30*z[5];
double a0 = fabs(latent[0]);
double a1 = fabs(latent[1]);
double a2 = fabs(latent[2]);
double sumA = a0 + a1 + a2 + EPS;
explainedVar[0] = a0 / sumA;
explainedVar[1] = a1 / sumA;
explainedVar[2] = a2 / sumA;
dom = explainedVar[0];
rot = fabs(explainedVar[0] - prevExplained0);
prevExplained0 = explainedVar[0];
}
};
class GMMRegimeModel {
public:
double pi[GMM_K];
double mu[GMM_K][GMM_DIM];
double var[GMM_K][GMM_DIM];
double p[GMM_K];
double entropy;
double conf;
int bestRegime;
int initialized;
GMMRegimeModel() : entropy(0), conf(0), bestRegime(0), initialized(0) {
memset(pi, 0, sizeof(pi));
memset(mu, 0, sizeof(mu));
memset(var, 0, sizeof(var));
memset(p, 0, sizeof(p));
}
void init() {
initialized = 0;
entropy = 0;
conf = 0;
bestRegime = 0;
for(int k=0;k<GMM_K;k++) {
pi[k] = 1.0 / (double)GMM_K;
for(int d=0; d<GMM_DIM; d++) {
mu[k][d] = 0.02 * (k - 1);
var[k][d] = 1.0;
}
p[k] = 1.0 / (double)GMM_K;
}
initialized = 1;
}
static double gaussianDiag(const double* x, const double* m, const double* v) {
double logp = 0;
for(int d=0; d<GMM_DIM; d++) {
double vv = v[d];
if(vv < GMM_VAR_FLOOR) vv = GMM_VAR_FLOOR;
double z = x[d] - m[d];
logp += -0.5 * (z*z / vv + log(vv + EPS));
}
if(logp < -80.0) logp = -80.0;
return exp(logp);
}
void infer(const double x[GMM_DIM]) {
if(!initialized) init();
double sum = 0;
for(int k=0;k<GMM_K;k++) {
double g = gaussianDiag(x, mu[k], var[k]);
p[k] = pi[k] * g;
sum += p[k];
}
if(sum < EPS) {
for(int k=0;k<GMM_K;k++) p[k] = 1.0 / (double)GMM_K;
} else {
for(int k=0;k<GMM_K;k++) p[k] /= sum;
}
bestRegime = 0;
conf = p[0];
for(int k=1;k<GMM_K;k++) {
if(p[k] > conf) {
conf = p[k];
bestRegime = k;
}
}
entropy = 0;
for(int k=0;k<GMM_K;k++) entropy -= p[k] * log(p[k] + EPS);
#if GMM_ONLINE_UPDATE
// lightweight incremental update (EM-like with forgetting)
for(int k=0;k<GMM_K;k++) {
double w = GMM_ALPHA * p[k];
pi[k] = (1.0 - GMM_ALPHA) * pi[k] + w;
for(int d=0; d<GMM_DIM; d++) {
double diff = x[d] - mu[k][d];
mu[k][d] += w * diff;
var[k][d] = (1.0 - w) * var[k][d] + w * diff * diff;
if(var[k][d] < GMM_VAR_FLOOR) var[k][d] = GMM_VAR_FLOOR;
}
}
#endif
}
};
class HMMRegimeModel {
public:
double A[HMM_K][HMM_K];
double mu[HMM_K][HMM_DIM];
double var[HMM_K][HMM_DIM];
double posterior[HMM_K];
double entropy;
double conf;
double switchProb;
int regime;
int initialized;
HMMRegimeModel() : entropy(0), conf(0), switchProb(0), regime(0), initialized(0) {
memset(A, 0, sizeof(A));
memset(mu, 0, sizeof(mu));
memset(var, 0, sizeof(var));
memset(posterior, 0, sizeof(posterior));
}
void init() {
for(int i=0;i<HMM_K;i++) {
for(int j=0;j<HMM_K;j++) A[i][j] = (i==j) ? 0.90 : 0.10/(double)(HMM_K-1);
for(int d=0; d<HMM_DIM; d++) {
mu[i][d] = 0.03 * (i - 1);
var[i][d] = 1.0;
}
posterior[i] = 1.0/(double)HMM_K;
}
regime = 0;
conf = posterior[0];
entropy = 0;
switchProb = 0;
initialized = 1;
}
static double emissionDiag(const double* x, const double* m, const double* v) {
double logp = 0;
for(int d=0; d<HMM_DIM; d++) {
double vv = v[d];
if(vv < HMM_VAR_FLOOR) vv = HMM_VAR_FLOOR;
double z = x[d] - m[d];
logp += -0.5 * (z*z / vv + log(vv + EPS));
}
if(logp < -80.0) logp = -80.0;
return exp(logp);
}
void filter(const double obs[HMM_DIM]) {
if(!initialized) init();
double pred[HMM_K];
for(int j=0;j<HMM_K;j++) {
pred[j] = 0;
for(int i=0;i<HMM_K;i++) pred[j] += posterior[i] * A[i][j];
}
double alpha[HMM_K];
double sum = 0;
for(int k=0;k<HMM_K;k++) {
double emit = emissionDiag(obs, mu[k], var[k]);
alpha[k] = pred[k] * emit;
sum += alpha[k];
}
if(sum < EPS) {
for(int k=0;k<HMM_K;k++) alpha[k] = 1.0/(double)HMM_K;
} else {
for(int k=0;k<HMM_K;k++) alpha[k] /= sum;
}
for(int k=0;k<HMM_K;k++) posterior[k] = alpha[k];
regime = 0;
conf = posterior[0];
for(int k=1;k<HMM_K;k++) if(posterior[k] > conf) { conf = posterior[k]; regime = k; }
entropy = 0;
for(int k=0;k<HMM_K;k++) entropy -= posterior[k] * log(posterior[k] + EPS);
switchProb = 1.0 - A[regime][regime];
if(switchProb < 0) switchProb = 0;
if(switchProb > 1) switchProb = 1;
#if HMM_ONLINE_UPDATE
for(int k=0;k<HMM_K;k++) {
double w = HMM_SMOOTH * posterior[k];
for(int d=0; d<HMM_DIM; d++) {
double diff = obs[d] - mu[k][d];
mu[k][d] += w * diff;
var[k][d] = (1.0 - w) * var[k][d] + w * diff * diff;
if(var[k][d] < HMM_VAR_FLOOR) var[k][d] = HMM_VAR_FLOOR;
}
}
#endif
}
};
class KMeansRegimeModel {
public:
double centroids[KMEANS_K][KMEANS_DIM];
double distEma;
double distVarEma;
int initialized;
int regime;
double dist;
double stability;
KMeansRegimeModel() : distEma(0), distVarEma(1), initialized(0), regime(0), dist(0), stability(0) {
memset(centroids, 0, sizeof(centroids));
}
void init() {
distEma = 0;
distVarEma = 1;
initialized = 0;
regime = 0;
dist = 0;
stability = 0;
memset(centroids, 0, sizeof(centroids));
}
void seed(const double x[KMEANS_DIM]) {
for(int k=0;k<KMEANS_K;k++) {
for(int d=0; d<KMEANS_DIM; d++) {
centroids[k][d] = x[d] + 0.03 * (k - 1);
}
}
initialized = 1;
}
static double clampRange(double x, double lo, double hi) {
if(x < lo) return lo;
if(x > hi) return hi;
return x;
}
void predictAndUpdate(const double x[KMEANS_DIM]) {
if(!initialized) seed(x);
int best = 0;
double bestDist = INF;
for(int k=0;k<KMEANS_K;k++) {
double s = 0;
for(int d=0; d<KMEANS_DIM; d++) {
double z = x[d] - centroids[k][d];
s += z * z;
}
double dk = sqrt(s + EPS);
if(dk < bestDist) {
bestDist = dk;
best = k;
}
}
regime = best;
dist = bestDist;
distEma = (1.0 - KMEANS_DIST_EMA) * distEma + KMEANS_DIST_EMA * dist;
double dd = dist - distEma;
distVarEma = (1.0 - KMEANS_DIST_EMA) * distVarEma + KMEANS_DIST_EMA * dd * dd;
double distStd = sqrt(distVarEma + EPS);
double zDist = (dist - distEma) / (distStd + EPS);
stability = clampRange(1.0 / (1.0 + exp(zDist)), 0.0, 1.0);
#if KMEANS_ONLINE_UPDATE
for(int d=0; d<KMEANS_DIM; d++) {
centroids[best][d] += KMEANS_ETA * (x[d] - centroids[best][d]);
}
#endif
}
};
class SpectralClusterModel {
public:
int clusterId[N_ASSETS];
int nClusters;
void init() {
nClusters = SPECTRAL_K;
for(int i=0;i<N_ASSETS;i++) clusterId[i] = i % SPECTRAL_K;
}
void update(const fvar* distMatrix) {
if(!distMatrix) return;
// lightweight deterministic clustering surrogate from distance rows
for(int i=0;i<N_ASSETS;i++) {
double sig = 0;
for(int j=0;j<N_ASSETS;j++) {
if(i == j) continue;
double d = (double)distMatrix[i*N_ASSETS + j];
if(d < INF) sig += d;
}
int cid = (int)fmod(fabs(sig * 1000.0), (double)SPECTRAL_K);
if(cid < 0) cid = 0;
if(cid >= SPECTRAL_K) cid = SPECTRAL_K - 1;
clusterId[i] = cid;
}
}
};
class HierarchicalClusteringModel {
public:
int clusterCoarse[N_ASSETS];
int clusterFine[N_ASSETS];
int nCoarse;
int nFine;
int leftChild[2*N_ASSETS];
int rightChild[2*N_ASSETS];
int nodeSize[2*N_ASSETS];
double nodeHeight[2*N_ASSETS];
double nodeDist[2*N_ASSETS][2*N_ASSETS];
int rootNode;
void init() {
nCoarse = HCLUST_COARSE_K;
nFine = HCLUST_FINE_K;
rootNode = N_ASSETS - 1;
for(int i=0;i<N_ASSETS;i++) {
clusterCoarse[i] = i % HCLUST_COARSE_K;
clusterFine[i] = i % HCLUST_FINE_K;
}
}
void collectLeaves(int node, int clusterId, int* out) {
int stack[2*N_ASSETS];
int sp = 0;
stack[sp++] = node;
while(sp > 0) {
int cur = stack[--sp];
if(cur < N_ASSETS) {
out[cur] = clusterId;
} else {
if(leftChild[cur] >= 0) stack[sp++] = leftChild[cur];
if(rightChild[cur] >= 0) stack[sp++] = rightChild[cur];
}
}
}
void cutByK(int K, int* out) {
for(int i=0;i<N_ASSETS;i++) out[i] = -1;
if(K <= 1) {
for(int i=0;i<N_ASSETS;i++) out[i] = 0;
return;
}
int clusters[2*N_ASSETS];
int count = 1;
clusters[0] = rootNode;
while(count < K) {
int bestPos = -1;
double bestHeight = -1;
for(int i=0;i<count;i++) {
int node = clusters[i];
if(node >= N_ASSETS && nodeHeight[node] > bestHeight) {
bestHeight = nodeHeight[node];
bestPos = i;
}
}
if(bestPos < 0) break;
int node = clusters[bestPos];
int l = leftChild[node];
int r = rightChild[node];
clusters[bestPos] = l;
clusters[count++] = r;
}
for(int c=0;c<count;c++) {
collectLeaves(clusters[c], c, out);
}
for(int i=0;i<N_ASSETS;i++) if(out[i] < 0) out[i] = 0;
}
void update(const fvar* distMatrix) {
if(!distMatrix) return;
int totalNodes = 2 * N_ASSETS;
for(int i=0;i<totalNodes;i++) {
leftChild[i] = -1;
rightChild[i] = -1;
nodeSize[i] = (i < N_ASSETS) ? 1 : 0;
nodeHeight[i] = 0;
for(int j=0;j<totalNodes;j++) nodeDist[i][j] = INF;
}
for(int i=0;i<N_ASSETS;i++) {
for(int j=0;j<N_ASSETS;j++) {
if(i == j) nodeDist[i][j] = 0;
else {
double d = (double)distMatrix[i*N_ASSETS + j];
if(d < 0 || d >= INF) d = 1.0;
nodeDist[i][j] = d;
}
}
}
int active[2*N_ASSETS];
int nActive = N_ASSETS;
for(int i=0;i<N_ASSETS;i++) active[i] = i;
int nextNode = N_ASSETS;
while(nActive > 1 && nextNode < 2*N_ASSETS) {
int ai = 0, aj = 1;
double best = INF;
for(int i=0;i<nActive;i++) {
for(int j=i+1;j<nActive;j++) {
int a = active[i], b = active[j];
if(nodeDist[a][b] < best) {
best = nodeDist[a][b];
ai = i; aj = j;
}
}
}
int a = active[ai];
int b = active[aj];
int m = nextNode++;
leftChild[m] = a;
rightChild[m] = b;
nodeHeight[m] = best;
nodeSize[m] = nodeSize[a] + nodeSize[b];
for(int i=0;i<nActive;i++) {
if(i == ai || i == aj) continue;
int k = active[i];
double da = nodeDist[a][k];
double db = nodeDist[b][k];
double dm = (nodeSize[a] * da + nodeSize[b] * db) / (double)(nodeSize[a] + nodeSize[b]);
nodeDist[m][k] = dm;
nodeDist[k][m] = dm;
}
nodeDist[m][m] = 0;
if(aj < ai) { int t=ai; ai=aj; aj=t; }
for(int i=aj;i<nActive-1;i++) active[i] = active[i+1];
nActive--;
for(int i=ai;i<nActive-1;i++) active[i] = active[i+1];
nActive--;
active[nActive++] = m;
}
rootNode = active[0];
int kc = HCLUST_COARSE_K;
if(kc < 1) kc = 1;
if(kc > N_ASSETS) kc = N_ASSETS;
int kf = HCLUST_FINE_K;
if(kf < 1) kf = 1;
if(kf > N_ASSETS) kf = N_ASSETS;
cutByK(kc, clusterCoarse);
cutByK(kf, clusterFine);
nCoarse = kc;
nFine = kf;
}
};
class CommunityDetectionModel {
public:
int communityId[N_ASSETS];
int clusterCoarse[N_ASSETS];
int clusterFine[N_ASSETS];
int nCommunities;
fvar modularityQ;
fvar qSmooth;
void init() {
nCommunities = 1;
modularityQ = 0;
qSmooth = 0;
for(int i=0;i<N_ASSETS;i++) {
communityId[i] = 0;
clusterCoarse[i] = i % HCLUST_COARSE_K;
clusterFine[i] = i % HCLUST_FINE_K;
}
}
static int argmaxLabel(const fvar w[N_ASSETS], const int label[N_ASSETS], int node) {
fvar acc[N_ASSETS];
for(int i=0;i<N_ASSETS;i++) acc[i] = 0;
for(int j=0;j<N_ASSETS;j++) {
if(j == node) continue;
int l = label[j];
if(l < 0 || l >= N_ASSETS) continue;
acc[l] += w[j];
}
int best = label[node];
fvar bestV = -1;
for(int l=0;l<N_ASSETS;l++) {
if(acc[l] > bestV) { bestV = acc[l]; best = l; }
}
return best;
}
void update(const fvar* corrMatrix, const fvar* distMatrix) {
if(!corrMatrix || !distMatrix) return;
fvar W[N_ASSETS][N_ASSETS];
fvar degree[N_ASSETS];
int label[N_ASSETS];
for(int i=0;i<N_ASSETS;i++) {
degree[i] = 0;
label[i] = i;
for(int j=0;j<N_ASSETS;j++) {
if(i == j) W[i][j] = 0;
else {
fvar w = (fvar)fabs((double)corrMatrix[i*N_ASSETS + j]);
if(w < (fvar)COMM_W_MIN) w = 0;
W[i][j] = w;
degree[i] += w;
}
}
}
// Optional top-M pruning for determinism/noise control
for(int i=0;i<N_ASSETS;i++) {
int keep[N_ASSETS];
for(int j=0;j<N_ASSETS;j++) keep[j] = 0;
for(int k=0;k<COMM_TOPM;k++) {
int best = -1;
fvar bestW = 0;
for(int j=0;j<N_ASSETS;j++) {
if(i==j || keep[j]) continue;
if(W[i][j] > bestW) { bestW = W[i][j]; best = j; }
}
if(best >= 0) keep[best] = 1;
}
for(int j=0;j<N_ASSETS;j++) if(i!=j && !keep[j]) W[i][j] = 0;
}
for(int it=0; it<COMM_ITERS; it++) {
for(int i=0;i<N_ASSETS;i++) {
label[i] = argmaxLabel(W[i], label, i);
}
}
// compress labels
int map[N_ASSETS];
for(int i=0;i<N_ASSETS;i++) map[i] = -1;
int nLab = 0;
for(int i=0;i<N_ASSETS;i++) {
int l = label[i];
if(l < 0 || l >= N_ASSETS) l = 0;
if(map[l] < 0) map[l] = nLab++;
communityId[i] = map[l];
}
if(nLab < 1) nLab = 1;
nCommunities = nLab;
// modularity approximation
fvar m2 = 0;
for(int i=0;i<N_ASSETS;i++) for(int j=0;j<N_ASSETS;j++) m2 += W[i][j];
if(m2 < (fvar)EPS) {
modularityQ = 0;
} else {
fvar q = 0;
for(int i=0;i<N_ASSETS;i++) {
for(int j=0;j<N_ASSETS;j++) {
if(communityId[i] == communityId[j]) {
q += W[i][j] - (degree[i] * degree[j] / m2);
}
}
}
modularityQ = q / m2;
}
qSmooth = (fvar)(1.0 - COMM_Q_EMA) * qSmooth + (fvar)COMM_Q_EMA * modularityQ;
for(int i=0;i<N_ASSETS;i++) {
int c = communityId[i];
if(c < 0) c = 0;
clusterCoarse[i] = c % HCLUST_COARSE_K;
clusterFine[i] = c % HCLUST_FINE_K;
}
}
};
class AutoencoderModel {
public:
double mu[AE_INPUT_DIM];
double sigma[AE_INPUT_DIM];
double W1[AE_LATENT_DIM][AE_INPUT_DIM];
double W2[AE_INPUT_DIM][AE_LATENT_DIM];
int initialized;
void init() {
initialized = 1;
for(int i=0;i<AE_INPUT_DIM;i++) {
mu[i] = 0;
sigma[i] = 1;
}
for(int z=0;z<AE_LATENT_DIM;z++) {
for(int d=0;d<AE_INPUT_DIM;d++) {
double w = sin((double)(z+1)*(d+1)) * 0.05;
W1[z][d] = w;
W2[d][z] = w;
}
}
}
static double act(double x) {
if(x > 4) x = 4;
if(x < -4) x = -4;
return tanh(x);
}
double infer(const double xIn[AE_INPUT_DIM]) {
if(!initialized) init();
double x[AE_INPUT_DIM];
for(int d=0;d<AE_INPUT_DIM;d++) x[d] = (xIn[d] - mu[d]) / (sigma[d] + EPS);
double z[AE_LATENT_DIM];
for(int k=0;k<AE_LATENT_DIM;k++) {
double s = 0;
for(int d=0;d<AE_INPUT_DIM;d++) s += W1[k][d] * x[d];
z[k] = act(s);
}
double recon[AE_INPUT_DIM];
for(int d=0;d<AE_INPUT_DIM;d++) {
double s = 0;
for(int k=0;k<AE_LATENT_DIM;k++) s += W2[d][k] * z[k];
recon[d] = act(s);
}
double err = 0;
for(int d=0;d<AE_INPUT_DIM;d++) {
double e = x[d] - recon[d];
err += e*e;
}
err /= (double)AE_INPUT_DIM;
for(int d=0;d<AE_INPUT_DIM;d++) {
mu[d] = (1.0 - AE_NORM_ALPHA) * mu[d] + AE_NORM_ALPHA * xIn[d];
double dv = xIn[d] - mu[d];
sigma[d] = (1.0 - AE_NORM_ALPHA) * sigma[d] + AE_NORM_ALPHA * sqrt(dv*dv + EPS);
if(sigma[d] < 1e-5) sigma[d] = 1e-5;
}
return err;
}
};
class NoveltyController {
public:
double errEma;
double errVar;
double zRecon;
int regime;
double riskScale;
void init() {
errEma = 0;
errVar = 1;
zRecon = 0;
regime = 0;
riskScale = 1.0;
}
static double clampRange(double x, double lo, double hi) {
if(x < lo) return lo;
if(x > hi) return hi;
return x;
}
void update(double reconError) {
errEma = (1.0 - AE_ERR_EMA) * errEma + AE_ERR_EMA * reconError;
double d = reconError - errEma;
errVar = (1.0 - AE_ERR_EMA) * errVar + AE_ERR_EMA * d*d;
double errStd = sqrt(errVar + EPS);
zRecon = (reconError - errEma) / (errStd + EPS);
if(zRecon >= AE_Z_HIGH) { regime = 2; riskScale = 0.20; }
else if(zRecon >= AE_Z_LOW) { regime = 1; riskScale = 0.60; }
else { regime = 0; riskScale = 1.00; }
riskScale = clampRange(riskScale, 0.20, 1.00);
}
void apply(int* topK, double* scoreScale) {
if(regime == 2) {
if(*topK > 3) *topK -= 2;
*scoreScale *= 0.60;
} else if(regime == 1) {
if(*topK > 3) *topK -= 1;
*scoreScale *= 0.85;
}
if(*topK < 1) *topK = 1;
if(*topK > TOP_K) *topK = TOP_K;
*scoreScale = clampRange(*scoreScale, 0.10, 2.00);
}
};
class SOMModel {
public:
double W[SOM_H][SOM_W][SOM_DIM];
int hitCount[SOM_H][SOM_W];
int bmuX;
int bmuY;
double conf;
int initialized;
void init() {
initialized = 1;
bmuX = 0; bmuY = 0; conf = 0;
for(int y=0;y<SOM_H;y++) {
for(int x=0;x<SOM_W;x++) {
hitCount[y][x] = 0;
for(int d=0;d<SOM_DIM;d++) {
W[y][x][d] = 0.02 * sin((double)(y+1)*(x+1)*(d+1));
}
}
}
}
static double clampRange(double x,double lo,double hi){ if(x<lo) return lo; if(x>hi) return hi; return x; }
void inferOrUpdate(const double s[SOM_DIM], int step) {
if(!initialized) init();
int bx=0, by=0;
double best=INF, second=INF;
for(int y=0;y<SOM_H;y++) {
for(int x=0;x<SOM_W;x++) {
double d2=0;
for(int k=0;k<SOM_DIM;k++) {
double z = s[k] - W[y][x][k];
d2 += z*z;
}
if(d2 < best) { second = best; best=d2; bx=x; by=y; }
else if(d2 < second) second = d2;
}
}
bmuX = bx; bmuY = by;
double d1 = sqrt(best + EPS);
double d2 = sqrt(second + EPS);
conf = clampRange((d2 - d1) / (d2 + EPS), 0.0, 1.0);
hitCount[bmuY][bmuX]++;
#if SOM_ONLINE_UPDATE
double alpha = SOM_ALPHA_MIN + (SOM_ALPHA_MAX - SOM_ALPHA_MIN) * exp(-0.005 * step);
double sigma = SOM_SIGMA_MIN + (SOM_SIGMA_MAX - SOM_SIGMA_MIN) * exp(-0.005 * step);
for(int y=0;y<SOM_H;y++) {
for(int x=0;x<SOM_W;x++) {
double gd2 = (double)((x-bmuX)*(x-bmuX) + (y-bmuY)*(y-bmuY));
double h = exp(-gd2 / (2.0*sigma*sigma + EPS));
for(int k=0;k<SOM_DIM;k++) {
W[y][x][k] += alpha * h * (s[k] - W[y][x][k]);
}
}
}
#endif
}
int regimeId() const { return bmuY * SOM_W + bmuX; }
};
class SOMPlaybook {
public:
int region;
double riskScale;
void init() { region = 0; riskScale = 1.0; }
void apply(const SOMModel& som, int* topK, double* scoreScale) {
int mx = som.bmuX;
int my = som.bmuY;
int cx = (mx >= SOM_W/2) ? 1 : 0;
int cy = (my >= SOM_H/2) ? 1 : 0;
region = cy * 2 + cx;
if(region == 0) { *scoreScale *= 1.02; riskScale = 1.00; }
else if(region == 1) { *scoreScale *= 0.95; riskScale = 0.85; if(*topK > 3) (*topK)--; }
else if(region == 2) { *scoreScale *= 0.90; riskScale = 0.70; if(*topK > 3) (*topK)--; }
else { *scoreScale *= 0.80; riskScale = 0.50; if(*topK > 2) (*topK)-=2; }
if(som.conf < SOM_CONF_MIN) {
riskScale *= 0.8;
if(*topK > 2) (*topK)--;
}
if(*topK < 1) *topK = 1;
if(*topK > TOP_K) *topK = TOP_K;
if(*scoreScale < 0.10) *scoreScale = 0.10;
if(*scoreScale > 2.00) *scoreScale = 2.00;
}
};
class StrategyController {
public:
UnsupervisedModel unsup;
RLAgent rl;
PCAModel pca;
GMMRegimeModel gmm;
HMMRegimeModel hmm;
KMeansRegimeModel kmeans;
int dynamicTopK;
double scoreScale;
int regime;
double adaptiveGamma;
double adaptiveAlpha;
double adaptiveBeta;
double adaptiveLambda;
double riskScale;
int cooldown;
StrategyController()
: dynamicTopK(TOP_K), scoreScale(1.0), regime(0),
adaptiveGamma(1.0), adaptiveAlpha(1.0), adaptiveBeta(1.0), adaptiveLambda(1.0), riskScale(1.0), cooldown(0) {}
static double clampRange(double x, double lo, double hi) {
if(x < lo) return lo;
if(x > hi) return hi;
return x;
}
void init() {
unsup.init();
rl.init();
pca.init();
gmm.init();
hmm.init();
kmeans.init();
dynamicTopK = TOP_K;
scoreScale = 1.0;
regime = 0;
adaptiveGamma = 1.0;
adaptiveAlpha = 1.0;
adaptiveBeta = 1.0;
adaptiveLambda = 1.0;
riskScale = 1.0;
cooldown = 0;
}
void buildGMMState(const LearningSnapshot& snap, int reg, double conf, double x[GMM_DIM]) {
x[0] = snap.meanScore;
x[1] = snap.meanCompactness;
x[2] = snap.meanVol;
x[3] = pca.dom;
x[4] = pca.rot;
x[5] = (double)reg / 2.0;
x[6] = conf;
x[7] = snap.meanScore - snap.meanCompactness;
}
void buildHMMObs(const LearningSnapshot& snap, int reg, double conf, double x[HMM_DIM]) {
x[0] = pca.latent[0];
x[1] = pca.latent[1];
x[2] = pca.latent[2];
x[3] = snap.meanVol;
x[4] = snap.meanScore;
x[5] = snap.meanCompactness;
x[6] = (double)reg / 2.0;
x[7] = conf;
}
void buildKMeansState(const LearningSnapshot& snap, int reg, double conf, double x[KMEANS_DIM]) {
x[0] = pca.latent[0];
x[1] = pca.latent[1];
x[2] = pca.latent[2];
x[3] = snap.meanVol;
x[4] = snap.meanScore;
x[5] = snap.meanCompactness;
x[6] = (double)reg / 2.0;
x[7] = conf;
}
void onUpdate(const LearningSnapshot& snap, fvar* scores, int nScores, int updateCount) {
#if USE_ML
double unsupConf = 0;
unsup.update(snap, ®ime, &unsupConf);
#if USE_PCA
pca.update(snap, regime, unsupConf);
#else
pca.dom = 0.5;
pca.rot = 0.0;
#endif
#if USE_GMM
double gx[GMM_DIM];
buildGMMState(snap, regime, unsupConf, gx);
gmm.infer(gx);
#if USE_HMM
double hx[HMM_DIM];
buildHMMObs(snap, regime, unsupConf, hx);
hmm.filter(hx);
#if USE_KMEANS
double kx[KMEANS_DIM];
buildKMeansState(snap, regime, unsupConf, kx);
kmeans.predictAndUpdate(kx);
#endif
#endif
// regime presets: [gamma, alpha, beta, lambda]
const double presets[GMM_K][4] = {
{1.05, 1.00, 0.95, 1.00},
{0.95, 1.05, 1.05, 0.95},
{1.00, 0.95, 1.10, 1.05}
};
adaptiveGamma = 0;
adaptiveAlpha = 0;
adaptiveBeta = 0;
adaptiveLambda = 0;
for(int k=0;k<GMM_K;k++) {
#if USE_HMM
adaptiveGamma += hmm.posterior[k] * presets[k][0];
adaptiveAlpha += hmm.posterior[k] * presets[k][1];
adaptiveBeta += hmm.posterior[k] * presets[k][2];
adaptiveLambda += hmm.posterior[k] * presets[k][3];
#else
adaptiveGamma += gmm.p[k] * presets[k][0];
adaptiveAlpha += gmm.p[k] * presets[k][1];
adaptiveBeta += gmm.p[k] * presets[k][2];
adaptiveLambda += gmm.p[k] * presets[k][3];
#endif
}
#if USE_HMM
double entNorm = hmm.entropy / log((double)HMM_K + EPS);
riskScale = clampRange(1.0 - 0.45 * entNorm, HMM_MIN_RISK, 1.0);
if(hmm.entropy > HMM_ENTROPY_TH || hmm.switchProb > HMM_SWITCH_TH) cooldown = HMM_COOLDOWN_UPDATES;
else if(cooldown > 0) cooldown--;
#else
double entNorm = gmm.entropy / log((double)GMM_K + EPS);
riskScale = clampRange(1.0 - GMM_ENTROPY_COEFF * entNorm, GMM_MIN_RISK, 1.0);
#endif
#else
adaptiveGamma = 1.0 + 0.35 * pca.dom - 0.25 * pca.rot;
adaptiveAlpha = 1.0 + 0.30 * pca.dom;
adaptiveBeta = 1.0 + 0.25 * pca.rot;
adaptiveLambda = 1.0 + 0.20 * pca.dom - 0.20 * pca.rot;
riskScale = 1.0;
#endif
adaptiveGamma = clampRange(adaptiveGamma, 0.80, 1.40);
adaptiveAlpha = clampRange(adaptiveAlpha, 0.85, 1.35);
adaptiveBeta = clampRange(adaptiveBeta, 0.85, 1.35);
adaptiveLambda = clampRange(adaptiveLambda, 0.85, 1.25);
#if USE_KMEANS
const double kmPreset[KMEANS_K][4] = {
{1.02, 1.00, 0.98, 1.00},
{1.08, 0.96, 0.95, 1.02},
{0.94, 1.08, 1.08, 0.92}
};
int kr = kmeans.regime;
if(kr < 0) kr = 0;
if(kr >= KMEANS_K) kr = KMEANS_K - 1;
double wkm = clampRange(kmeans.stability, 0.0, 1.0);
adaptiveGamma = (1.0 - wkm) * adaptiveGamma + wkm * kmPreset[kr][0];
adaptiveAlpha = (1.0 - wkm) * adaptiveAlpha + wkm * kmPreset[kr][1];
adaptiveBeta = (1.0 - wkm) * adaptiveBeta + wkm * kmPreset[kr][2];
adaptiveLambda = (1.0 - wkm) * adaptiveLambda + wkm * kmPreset[kr][3];
if(kmeans.stability < KMEANS_STABILITY_MIN) {
riskScale *= 0.85;
if(cooldown < 1) cooldown = 1;
}
#endif
rl.updateReward(snap.meanScore);
rl.lastAction = rl.chooseAction(updateCount);
int baseTopK = TOP_K;
if(rl.lastAction == 0) baseTopK = TOP_K - 2;
else if(rl.lastAction == 1) baseTopK = TOP_K;
else if(rl.lastAction == 2) baseTopK = TOP_K;
else baseTopK = TOP_K - 1;
double profileBias[5] = {1.00, 0.98, 0.99, 0.97, 1.02};
scoreScale = (1.0 + 0.06 * (adaptiveGamma - 1.0) + 0.04 * (adaptiveAlpha - 1.0) - 0.04 * (adaptiveBeta - 1.0))
* profileBias[STRATEGY_PROFILE] * riskScale;
if(pca.dom > 0.60) baseTopK -= 1;
if(pca.rot > 0.15) baseTopK -= 1;
#if USE_HMM
if(hmm.regime == 2) baseTopK -= 1;
if(cooldown > 0) baseTopK -= 1;
#if USE_KMEANS
if(kmeans.regime == 2) baseTopK -= 1;
#endif
#elif USE_GMM
if(gmm.bestRegime == 2) baseTopK -= 1;
#endif
dynamicTopK = baseTopK;
if(dynamicTopK < 1) dynamicTopK = 1;
if(dynamicTopK > TOP_K) dynamicTopK = TOP_K;
for(int i=0; i<nScores; i++) {
double s = (double)scores[i] * scoreScale;
if(s > 1.0) s = 1.0;
if(s < 0.0) s = 0.0;
scores[i] = (fvar)s;
}
#else
(void)snap; (void)scores; (void)nScores; (void)updateCount;
#endif
}
};
// ---------------------------- Strategy ----------------------------
class CompactDominantStrategy {
public:
ExposureTable exposureTable;
FeatureBufferSoA featSoA;
OpenCLBackend openCL;
SlabAllocator<fvar> corrMatrix;
SlabAllocator<fvar> distMatrix;
SlabAllocator<fvar> compactness;
SlabAllocator<fvar> scores;
SlabAllocator<float> featLinear;
SlabAllocator<float> corrLinear;
int barCount;
int updateCount;
StrategyController controller;
HierarchicalClusteringModel hclust;
CommunityDetectionModel comm;
AutoencoderModel ae;
NoveltyController novelty;
SOMModel som;
SOMPlaybook somPlaybook;
CompactDominantStrategy() : barCount(0), updateCount(0) {}
void init() {
printf("CompactDominant_v13: Initializing...\n");
exposureTable.init();
featSoA.init(N_ASSETS, FEAT_WINDOW);
corrMatrix.init(N_ASSETS * N_ASSETS);
distMatrix.init(N_ASSETS * N_ASSETS);
compactness.init(N_ASSETS);
scores.init(N_ASSETS);
featLinear.init(FEAT_N * N_ASSETS * FEAT_WINDOW);
corrLinear.init(N_ASSETS * N_ASSETS);
openCL.init();
printf("CompactDominant_v13: Ready (OpenCL=%d)\n", openCL.ready);
controller.init();
hclust.init();
comm.init();
ae.init();
novelty.init();
som.init();
somPlaybook.init();
barCount = 0;
updateCount = 0;
}
void shutdown() {
printf("CompactDominant_v13: Shutting down...\n");
openCL.shutdown();
featSoA.shutdown();
corrMatrix.shutdown();
distMatrix.shutdown();
compactness.shutdown();
scores.shutdown();
featLinear.shutdown();
corrLinear.shutdown();
}
void computeFeatures(int assetIdx) {
asset((char*)ASSET_NAMES[assetIdx]);
vars C = series(priceClose(0));
vars V = series(Volatility(C, 20));
if(Bar < 50) return;
fvar r1 = (fvar)log(C[0] / C[1]);
fvar rN = (fvar)log(C[0] / C[12]);
fvar vol = (fvar)V[0];
fvar zscore = (fvar)((C[0] - C[50]) / (V[0] * 20.0 + EPS));
fvar rangeP = (fvar)((C[0] - C[50]) / (C[0] + EPS));
fvar flow = (fvar)(r1 * vol);
fvar regime = (fvar)((vol > 0.001) ? 1.0 : 0.0);
fvar volOfVol = (fvar)(vol * vol);
fvar persistence = (fvar)fabs(r1);
featSoA.push(0, assetIdx, r1);
featSoA.push(1, assetIdx, rN);
featSoA.push(2, assetIdx, vol);
featSoA.push(3, assetIdx, zscore);
featSoA.push(4, assetIdx, rangeP);
featSoA.push(5, assetIdx, flow);
featSoA.push(6, assetIdx, regime);
featSoA.push(7, assetIdx, volOfVol);
featSoA.push(8, assetIdx, persistence);
}
void computeCorrelationMatrixCPU() {
for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrMatrix[i] = 0;
for(int f=0; f<FEAT_N; f++){
for(int a=0; a<N_ASSETS; a++){
for(int b=a+1; b<N_ASSETS; b++){
fvar mx = 0, my = 0;
for(int t=0; t<FEAT_WINDOW; t++){
mx += featSoA.get(f,a,t);
my += featSoA.get(f,b,t);
}
mx /= (fvar)FEAT_WINDOW;
my /= (fvar)FEAT_WINDOW;
fvar sxx = 0, syy = 0, sxy = 0;
for(int t=0; t<FEAT_WINDOW; t++){
fvar dx = featSoA.get(f,a,t) - mx;
fvar dy = featSoA.get(f,b,t) - my;
sxx += dx*dx;
syy += dy*dy;
sxy += dx*dy;
}
fvar den = (fvar)sqrt((double)(sxx*syy + (fvar)EPS));
fvar corr = 0;
if(den > (fvar)EPS) corr = sxy / den;
else corr = 0;
int idx = a*N_ASSETS + b;
corrMatrix[idx] += corr / (fvar)FEAT_N;
corrMatrix[b*N_ASSETS + a] = corrMatrix[idx];
}
}
}
}
void buildFeatLinear() {
int idx = 0;
for(int f=0; f<FEAT_N; f++){
for(int a=0; a<N_ASSETS; a++){
for(int t=0; t<FEAT_WINDOW; t++){
featLinear[idx] = (float)featSoA.get(f, a, t);
idx++;
}
}
}
}
void computeCorrelationMatrix() {
if(openCL.ready) {
buildFeatLinear();
for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrLinear[i] = 0.0f;
int ok = openCL.computeCorrelationMatrixCL(
featLinear.data,
corrLinear.data,
N_ASSETS,
FEAT_N,
FEAT_WINDOW
);
if(ok) {
for(int i=0;i<N_ASSETS*N_ASSETS;i++) corrMatrix[
210
65,908
Read More
|
|
|
03/04/26 18:08
Currently, we have file_select(), which allows the user to select a single file in a dialog, and file_next(), which allows the programmer to iterate through all files in a given path.
I'd like to request two features: 1) Ability to select a folder with a dialog box. 2) Ability to select multiple files in the same dialog box. Maybe output the list of files to file_next().
1
107
Read More
|
|
|
03/03/26 03:17
You could potentially build with CGO shared library, it'd be a huge dll covering all exchanges or build a thin http microservice with node/python. I did the later for hyperliquid before building a native plugin.
3
751
Read More
|
|
03/02/26 18:13
NeuroWeave Render Bridge is a single-file demonstration that stitches together three normally separate domains into one continuous runtime loop: a neural modeling domain provided by LibTorch, a massively parallel compute domain provided by OpenCL, and a real-time display domain provided by OpenGL through the Win32 windowing system. The program’s purpose is not traditional training, and it is not a trading strategy in the usual sense. Instead, it is a proof-of-integration pattern: it shows how to safely combine a machine learning library with a compute kernel and a graphics pipeline inside the same binary, while also being compatible with Zorro’s DLL lifecycle. The story begins with defensive integration. The file uses a strict include order: LibTorch comes first, Zorro comes after, and then macro cleanup happens before OpenCL and OpenGL headers are introduced. This ordering is a practical requirement because both LibTorch and Zorro bring global identifiers and macros that can collide. The code explicitly renames one of Zorro’s short identifiers before including the Zorro header, then restores it afterward. Immediately after that, it removes common macro definitions such as min, max, abs, and other short names that can silently rewrite later code. This part is not glamorous, but it is crucial: it ensures that when the program says “tanh” or “abs” or “min,” it gets the intended function and not an accidental macro substitution. In a hybrid system like this, “mathematical correctness” starts with compile-time hygiene. Once the compilation environment is stabilized, the program constructs the display side using Win32 and OpenGL. It creates a window class, spawns a window, and then establishes a WGL context, which is the Windows pathway for binding OpenGL rendering to that window. The OpenGL configuration is intentionally minimal: no depth test, a fixed viewport, and a simple texture-based draw. Instead of drawing complex geometry, it draws a single textured quad that covers the screen. This keeps the display pipeline simple and reliable. The key OpenGL objects are a pixel buffer object and a texture. The pixel buffer object is a GPU-resident memory region sized to hold one frame of pixels in four channels. The texture is allocated to match the window size, and it is configured with nearest-neighbor filtering so the program’s pixel output appears crisp without interpolation artifacts. In this architecture, the texture is the final display surface, but the pixel buffer object is the intermediate staging region that can be shared with OpenCL. The compute side is built around OpenCL with OpenGL sharing enabled. This is where the most important relationship between OpenCL and OpenGL appears. OpenCL and OpenGL can both operate on GPU memory, but they usually do so in separate ecosystems. Sharing is the mechanism that allows a buffer created in OpenGL to be directly visible to OpenCL, without copying data through the CPU. The program searches for a GPU device that advertises the OpenCL extension required for OpenGL interoperability. Once it finds a suitable device, it creates an OpenCL context that is explicitly linked to the active OpenGL context and the current device context. That linkage is established through context properties that pass the current OpenGL context and the window device context into OpenCL. Symbolically, this step is an agreement: OpenCL is allowed to work on objects that OpenGL created, but only under the rules of this shared context. After the shared context is created, the program compiles an OpenCL kernel from source embedded as a string. The kernel is a per-pixel renderer that writes RGBA color values into an output buffer. That output buffer is not an ordinary OpenCL buffer in this design; it is a handle created by wrapping the OpenGL pixel buffer object as an OpenCL memory object. This is the heart of the bridge: the same physical memory region is treated as an OpenCL output surface during computation and as an OpenGL pixel source during rendering. Next comes the learning side. LibTorch is used to define and initialize a tiny multilayer perceptron. The network is deliberately small: it accepts two inputs, produces a hidden representation of moderate size, and outputs three channels that will later be interpreted as color components. The model uses a smooth nonlinearity in each layer to produce continuous output. The important conceptual relationship between LibTorch and OpenCL is representation. LibTorch stores parameters as tensors with metadata and potential device placement. OpenCL wants raw arrays in contiguous memory blocks. The code therefore builds the model, switches it into evaluation mode, extracts the weight matrices and bias vectors, forces them into CPU memory and contiguous layout, and copies them into plain float arrays. Those arrays become the canonical parameter representation for the rest of the system. The program then uploads those parameters into OpenCL buffers. Each parameter block is stored in its own OpenCL buffer and marked read-only, because the kernel treats them as constants during inference. This stage establishes the first half of the mathematical relationship between LibTorch and OpenCL: LibTorch authors a function by defining parameter values, and OpenCL consumes those values to evaluate the function at a much larger scale than a CPU loop could easily manage. In other words, LibTorch supplies the “shape” of the neural mapping through weights, while OpenCL supplies the “reach” by running the same mapping across a full two-dimensional grid of pixels. This version extends the bridge by adding parameter evolution on the host. After the initial weights are produced by LibTorch and uploaded to OpenCL, the program continues to modify the parameters over time. It maintains host-side copies of all parameters in arrays and, on each frame, applies a small update step that nudges parameters based on neighboring parameter values, a slow oscillatory drift tied to the phase, and a small random disturbance derived from a per-frame seed. This evolution is not training in the machine learning sense; it is a procedural mutation rule that makes the network’s behavior shift gradually as the animation runs. The code packs all parameters into a single linear list, computes a new list by blending each parameter with its neighbors and adding controlled drift and noise, clamps the resulting values to keep them within a reasonable bound, and then writes them back into the structured parameter arrays. It then applies a secondary balancing step that pulls the means of different parameter groups toward each other, which prevents one part of the network from drifting too far away in magnitude compared to the others. This creates a self-stabilizing parameter motion that is visually interesting while remaining bounded. The relationship between this evolving parameter process and OpenCL is straightforward: each frame, after host-side evolution runs, the updated parameter arrays are written into the OpenCL buffers again. This means the OpenCL kernel always sees a fresh set of weights and biases, which makes each frame’s neural inference slightly different. The writes are performed without blocking wherever possible, and they are synchronized before rendering completes through command queue finishing. This is a classic producer-consumer rhythm: the CPU produces new parameters, OpenCL consumes them to generate pixels, and OpenGL consumes those pixels to display the frame. Inside the OpenCL kernel, the mapping from pixel location to neural inputs is done in a coordinate space normalized to a convenient range. The kernel derives two input values from the spatial coordinates, the phase, and a per-pixel jitter term. The jitter term comes from a deterministic hash-style mixing function seeded with a per-frame noise seed and pixel coordinates. That means the jitter is consistent for a given frame but changes across frames because the seed changes. The kernel evaluates the hidden layer by multiplying inputs by weights, adding biases, and applying the nonlinearity. It then evaluates the output layer similarly and produces three bounded output values. Those outputs are then mixed with simple procedural effects like stripes and a radial vignette to create a visually structured image. Finally, the kernel writes RGBA bytes into the shared output buffer. The OpenCL and OpenGL relationship is protected by explicit ownership transfers. Before the kernel runs, the program acquires the shared OpenGL buffer for OpenCL use. After the kernel finishes, it releases the buffer back to OpenGL. This acquire and release sequence is the synchronization contract that prevents OpenGL from reading pixels while OpenCL is still writing them. After release and a final finish call, the OpenGL side updates the texture from the pixel buffer object and draws it to the screen. No CPU readback is needed. The GPU-to-GPU pathway remains intact throughout the loop, which is the principal performance benefit of the CL and GL sharing mechanism. The program is also wrapped in a Zorro-friendly entry point. It forces Zorro to run only a single cycle to avoid repeated launches, and it watches for Zorro exit flags so that a stop request can cleanly close the window and release resources. This allows the demo to be launched from within a Zorro environment while still behaving like a normal Win32 graphical program. In symbolic terms, NeuroWeave Render Bridge is a moving tapestry built from three looms. LibTorch defines the weave pattern by providing neural parameters. The host evolution logic slowly changes that pattern over time, like a hand adjusting threads. OpenCL performs the weaving at scale by evaluating the network for every pixel in parallel and writing the resulting colors into a shared canvas. OpenGL then displays the canvas with minimal overhead, completing the loop. The significance of the code lies in the disciplined interfaces between systems: clear naming hygiene, careful memory representation changes, explicit synchronization, and a predictable frame pipeline that can be controlled under a host application’s lifecycle. // Mendb02.cpp
// Win32 + WGL(OpenGL) display + OpenCL compute (CL/GL sharing)
// + Tiny Neural Net inference per pixel (OpenCL kernel) using weights from LibTorch.
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#define _CRT_SECURE_NO_WARNINGS
// ============================================================
// 1) Include LibTorch FIRST (like your working file)
// Public/shareable variant: no machine-specific include paths.
// ============================================================
#if defined(__has_include)
#if __has_include(<torch/torch.h>) && __has_include(<torch/script.h>)
#include <torch/torch.h>
#include <torch/script.h>
#else
#error "LibTorch headers not found. Add LibTorch include paths to your build configuration."
#endif
#else
#include <torch/torch.h>
#include <torch/script.h>
#endif
// (Optional) CUDA headers (safe pattern used by your working file)
// Keep them conditional so CPU-only LibTorch setups still compile.
#if defined(__has_include)
#if __has_include(<torch/cuda.h>)
#include <torch/cuda.h>
#define HAVE_TORCH_CUDA_HEADER 1
#else
#define HAVE_TORCH_CUDA_HEADER 0
#endif
#if __has_include(<cuda_runtime_api.h>)
#include <cuda_runtime_api.h>
#define HAVE_CUDA_RUNTIME_API_HEADER 1
#else
#define HAVE_CUDA_RUNTIME_API_HEADER 0
#endif
#else
#define HAVE_TORCH_CUDA_HEADER 0
#define HAVE_CUDA_RUNTIME_API_HEADER 0
#endif
#if defined(__has_include)
#if __has_include(<c10/cuda/CUDAGuard.h>) && __has_include(<c10/cuda/impl/cuda_cmake_macros.h>)
#include <c10/cuda/CUDAGuard.h>
#define HAVE_C10_CUDAGUARD 1
#else
#define HAVE_C10_CUDAGUARD 0
#endif
#else
#define HAVE_C10_CUDAGUARD 0
#endif
// ============================================================
// 2) Standard headers
// ============================================================
#include <windows.h>
#include <stdio.h>
#include <math.h>
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <stdint.h>
// ============================================================
// 3) Include Zorro AFTER torch, rename Zorro's 'at' to avoid conflict
// (exact pattern from your working file)
// ============================================================
#define at zorro_at
#ifdef LOG
#undef LOG
#endif
#include <zorro.h>
#undef at
// ============================================================
// 4) Cleanup macro landmines (exact style from your working file)
// ============================================================
#ifdef min
#undef min
#endif
#ifdef max
#undef max
#endif
#ifdef ref
#undef ref
#endif
#ifdef swap
#undef swap
#endif
#ifdef abs
#undef abs
#endif
#ifdef NTF
#undef NTF
#endif
#ifdef LOOKBACK
#undef LOOKBACK
#endif
#ifdef BINS
#undef BINS
#endif
// ============================================================
// OpenCL + OpenGL includes (after the macro cleanup is safest)
// ============================================================
#include <CL/cl.h>
#include <CL/cl_gl.h> // cl_khr_gl_sharing
#include <CL/cl_gl_ext.h> // CL_GL_CONTEXT_KHR / CL_WGL_HDC_KHR
#include <GL/gl.h>
#ifndef GL_RGBA8
#define GL_RGBA8 0x8058
#endif
// ------------------------- Globals -------------------------
static HWND gHwnd = 0;
static HDC gHdc = 0;
static HGLRC gHgl = 0;
static int gW = 640;
static int gH = 480;
static float gPhase = 0.0f;
static unsigned int gNoiseSeed = 1u;
static int read_env_int(const char* key, int fallback)
{
const char* s = getenv(key);
if(!s || !*s) return fallback;
int v = atoi(s);
return (v > 0) ? v : fallback;
}
// ------------------------- WinProc forward -------------------------
LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
// ===========================================================
// Minimal OpenGL function loading
// ===========================================================
#ifndef GL_ARRAY_BUFFER
#define GL_ARRAY_BUFFER 0x8892
#endif
#ifndef GL_PIXEL_UNPACK_BUFFER
#define GL_PIXEL_UNPACK_BUFFER 0x88EC
#endif
#ifndef GL_DYNAMIC_DRAW
#define GL_DYNAMIC_DRAW 0x88E8
#endif
#ifndef APIENTRY
#define APIENTRY __stdcall
#endif
#ifndef APIENTRYP
#define APIENTRYP APIENTRY *
#endif
typedef void (APIENTRYP PFNGLGENBUFFERSPROC)(GLsizei, GLuint*);
typedef void (APIENTRYP PFNGLBINDBUFFERPROC)(GLenum, GLuint);
typedef void (APIENTRYP PFNGLBUFFERDATAPROC)(GLenum, ptrdiff_t, const void*, GLenum);
typedef void (APIENTRYP PFNGLDELETEBUFFERSPROC)(GLsizei, const GLuint*);
static PFNGLGENBUFFERSPROC p_glGenBuffers = 0;
static PFNGLBINDBUFFERPROC p_glBindBuffer = 0;
static PFNGLBUFFERDATAPROC p_glBufferData = 0;
static PFNGLDELETEBUFFERSPROC p_glDeleteBuffers = 0;
static void* gl_get_proc(const char* name)
{
void* p = (void*)wglGetProcAddress(name);
if(!p) {
HMODULE ogl = GetModuleHandleA("opengl32.dll");
if(ogl) p = (void*)GetProcAddress(ogl, name);
}
return p;
}
static int gl_load_ext()
{
p_glGenBuffers = (PFNGLGENBUFFERSPROC)gl_get_proc("glGenBuffers");
p_glBindBuffer = (PFNGLBINDBUFFERPROC)gl_get_proc("glBindBuffer");
p_glBufferData = (PFNGLBUFFERDATAPROC)gl_get_proc("glBufferData");
p_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)gl_get_proc("glDeleteBuffers");
if(!p_glGenBuffers || !p_glBindBuffer || !p_glBufferData || !p_glDeleteBuffers)
return 0;
return 1;
}
// ===========================================================
// OpenGL objects
// ===========================================================
static GLuint gPBO = 0;
static GLuint gTex = 0;
static void gl_release_all()
{
if(gTex) {
glDeleteTextures(1, &gTex);
gTex = 0;
}
if(gPBO) {
if(p_glDeleteBuffers) p_glDeleteBuffers(1, &gPBO);
gPBO = 0;
}
if(gHgl) { wglMakeCurrent(NULL, NULL); wglDeleteContext(gHgl); gHgl = 0; }
if(gHdc && gHwnd) { ReleaseDC(gHwnd, gHdc); gHdc = 0; }
}
static int gl_init_wgl(HWND hwnd)
{
gHwnd = hwnd;
gHdc = GetDC(hwnd);
if(!gHdc) return 0;
PIXELFORMATDESCRIPTOR pfd;
ZeroMemory(&pfd, sizeof(pfd));
pfd.nSize = sizeof(pfd);
pfd.nVersion = 1;
pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
pfd.iPixelType = PFD_TYPE_RGBA;
pfd.cColorBits = 32;
pfd.cDepthBits = 16;
pfd.iLayerType = PFD_MAIN_PLANE;
int pf = ChoosePixelFormat(gHdc, &pfd);
if(pf == 0) return 0;
if(!SetPixelFormat(gHdc, pf, &pfd)) return 0;
gHgl = wglCreateContext(gHdc);
if(!gHgl) return 0;
if(!wglMakeCurrent(gHdc, gHgl)) return 0;
if(!gl_load_ext()) {
printf("\nOpenGL buffer functions not available (need VBO/PBO support).");
return 0;
}
glDisable(GL_DEPTH_TEST);
glViewport(0, 0, gW, gH);
// Create PBO for RGBA pixels
p_glGenBuffers(1, &gPBO);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gPBO);
p_glBufferData(GL_PIXEL_UNPACK_BUFFER, (ptrdiff_t)(gW * gH * 4), 0, GL_DYNAMIC_DRAW);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
// Create texture
glGenTextures(1, &gTex);
glBindTexture(GL_TEXTURE_2D, gTex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, gW, gH, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glBindTexture(GL_TEXTURE_2D, 0);
return 1;
}
// ===========================================================
// Tiny NN (LibTorch -> weights)
// ===========================================================
#define NN_IN 2
#define NN_H 16
#define NN_OUT 3
#define NN_PARAM_COUNT (NN_H*NN_IN + NN_H + NN_OUT*NN_H + NN_OUT)
static float gHost_W1[NN_H*NN_IN];
static float gHost_b1[NN_H];
static float gHost_W2[NN_OUT*NN_H];
static float gHost_b2[NN_OUT];
struct TinyMLPImpl : torch::nn::Module {
torch::nn::Linear fc1{nullptr}, fc2{nullptr};
TinyMLPImpl() {
fc1 = register_module("fc1", torch::nn::Linear(NN_IN, NN_H));
fc2 = register_module("fc2", torch::nn::Linear(NN_H, NN_OUT));
}
torch::Tensor forward(torch::Tensor x) {
x = torch::tanh(fc1->forward(x));
x = torch::tanh(fc2->forward(x));
return x;
}
};
TORCH_MODULE(TinyMLP);
static int build_weights_from_libtorch(float* W1, float* b1, float* W2, float* b2)
{
if(!W1 || !b1 || !W2 || !b2) return 0;
try {
torch::NoGradGuard ng;
torch::manual_seed((uint64_t)time(NULL) ^ (uint64_t)GetTickCount64());
TinyMLP m;
m->eval();
auto w1 = m->fc1->weight.detach().contiguous().to(torch::kCPU);
auto bb1 = m->fc1->bias.detach().contiguous().to(torch::kCPU);
auto w2 = m->fc2->weight.detach().contiguous().to(torch::kCPU);
auto bb2 = m->fc2->bias.detach().contiguous().to(torch::kCPU);
memcpy(W1, w1.data_ptr<float>(), sizeof(float)*NN_H*NN_IN);
memcpy(b1, bb1.data_ptr<float>(), sizeof(float)*NN_H);
memcpy(W2, w2.data_ptr<float>(), sizeof(float)*NN_OUT*NN_H);
memcpy(b2, bb2.data_ptr<float>(), sizeof(float)*NN_OUT);
return 1;
}
catch(const c10::Error& e) {
printf("\n[LibTorch] Error: %s", e.what());
return 0;
}
catch(...) {
printf("\n[LibTorch] Unknown error.");
return 0;
}
}
// ===========================================================
// OpenCL (GL sharing)
// ===========================================================
static int gCL_Ready = 0;
static cl_platform_id gCL_Platform = 0;
static cl_device_id gCL_Device = 0;
static cl_context gCL_Context = 0;
static cl_command_queue gCL_Queue = 0;
static cl_program gCL_Program = 0;
static cl_kernel gCL_K_NN = 0;
static cl_mem gCL_PBO = 0; // CL view of GL PBO
static cl_mem gCL_W1 = 0;
static cl_mem gCL_b1 = 0;
static cl_mem gCL_W2 = 0;
static cl_mem gCL_b2 = 0;
static void pack_params(float* theta);
static void unpack_params(const float* theta);
static void evolve_params_accumulated(float phase, unsigned int seed);
#define STR2(x) #x
#define XSTR(x) STR2(x)
static const char* gCL_Source =
"__kernel void nn_render(__global uchar4* out, int width, int height, \n"
" __global const float* W1, __global const float* b1, \n"
" __global const float* W2, __global const float* b2, float phase, uint seed) \n"
"{ \n"
" int xpix = (int)get_global_id(0); \n"
" int ypix = (int)get_global_id(1); \n"
" if(xpix >= width || ypix >= height) return; \n"
" \n"
" float x = ((float)xpix / (float)(width - 1)) * 2.0f - 1.0f; \n"
" float y = ((float)ypix / (float)(height - 1)) * 2.0f - 1.0f; \n"
" uint n = (uint)(xpix*1973u) ^ (uint)(ypix*9277u) ^ (seed*26699u + 911u); \n"
" n = (n << 13) ^ n; \n"
" uint m = (n * (n*n*15731u + 789221u) + 1376312589u); \n"
" float jitter = ((float)(m & 0x00ffffffu) / 16777215.0f) * 2.0f - 1.0f; \n"
" float in0 = 2.8f*x + 0.7f*sin(3.0f*y + phase) + 0.35f*jitter; \n"
" float in1 = -2.8f*y + 0.7f*cos(3.0f*x - 1.3f*phase) - 0.35f*jitter; \n"
" \n"
" float h[" XSTR(NN_H) "]; \n"
" for(int j=0;j<" XSTR(NN_H) ";j++){ \n"
" float acc = b1[j]; \n"
" acc += in0 * W1[j*" XSTR(NN_IN) " + 0]; \n"
" acc += in1 * W1[j*" XSTR(NN_IN) " + 1]; \n"
" h[j] = tanh(acc); \n"
" } \n"
" \n"
" float o[" XSTR(NN_OUT) "]; \n"
" for(int k=0;k<" XSTR(NN_OUT) ";k++){ \n"
" float acc = b2[k]; \n"
" for(int j=0;j<" XSTR(NN_H) ";j++){ \n"
" acc += h[j] * W2[k*" XSTR(NN_H) " + j]; \n"
" } \n"
" float s = 0.5f + 0.5f*tanh(acc); \n"
" if(s<0) s=0; if(s>1) s=1; \n"
" o[k] = s; \n"
" } \n"
" \n"
" float radial = sqrt(x*x + y*y); \n"
" float vignette = clamp(1.15f - radial, 0.0f, 1.0f); \n"
" float stripe = 0.5f + 0.5f*sin(10.0f*(x + y) + phase + 2.0f*jitter); \n"
" float rcol = clamp(0.70f*o[0] + 0.30f*stripe, 0.0f, 1.0f) * vignette; \n"
" float gcol = clamp(0.85f*o[1] + 0.15f*(1.0f - stripe), 0.0f, 1.0f) * vignette; \n"
" float bcol = clamp(0.75f*o[2] + 0.25f*(0.5f + 0.5f*cos(8.0f*x - phase)),0.0f,1.0f);\n"
" uchar r = (uchar)(255.0f*rcol); \n"
" uchar g = (uchar)(255.0f*gcol); \n"
" uchar b = (uchar)(255.0f*bcol); \n"
" out[ypix*width + xpix] = (uchar4)(r,g,b,255); \n"
"} \n";
static void cl_release_all()
{
if(gCL_b2) { clReleaseMemObject(gCL_b2); gCL_b2 = 0; }
if(gCL_W2) { clReleaseMemObject(gCL_W2); gCL_W2 = 0; }
if(gCL_b1) { clReleaseMemObject(gCL_b1); gCL_b1 = 0; }
if(gCL_W1) { clReleaseMemObject(gCL_W1); gCL_W1 = 0; }
if(gCL_PBO) { clReleaseMemObject(gCL_PBO); gCL_PBO = 0; }
if(gCL_K_NN) { clReleaseKernel(gCL_K_NN); gCL_K_NN = 0; }
if(gCL_Program){ clReleaseProgram(gCL_Program); gCL_Program = 0; }
if(gCL_Queue) { clReleaseCommandQueue(gCL_Queue); gCL_Queue = 0; }
if(gCL_Context){ clReleaseContext(gCL_Context); gCL_Context = 0; }
gCL_Device = 0;
gCL_Platform = 0;
gCL_Ready = 0;
}
static int cl_pick_device_with_glshare(cl_platform_id* outP, cl_device_id* outD)
{
cl_uint nPlatforms = 0;
if(clGetPlatformIDs(0, 0, &nPlatforms) != CL_SUCCESS || nPlatforms == 0)
return 0;
cl_platform_id platforms[8];
if(nPlatforms > 8) nPlatforms = 8;
if(clGetPlatformIDs(nPlatforms, platforms, &nPlatforms) != CL_SUCCESS)
return 0;
for(cl_uint p=0; p<nPlatforms; p++)
{
cl_uint nDev = 0;
if(clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_GPU, 0, 0, &nDev) != CL_SUCCESS || nDev == 0)
continue;
cl_device_id devs[8];
if(nDev > 8) nDev = 8;
if(clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_GPU, nDev, devs, &nDev) != CL_SUCCESS)
continue;
for(cl_uint d=0; d<nDev; d++)
{
char ext[8192];
size_t sz = 0;
if(clGetDeviceInfo(devs[d], CL_DEVICE_EXTENSIONS, sizeof(ext), ext, &sz) != CL_SUCCESS)
continue;
if(strstr(ext, "cl_khr_gl_sharing"))
{
*outP = platforms[p];
*outD = devs[d];
return 1;
}
}
}
return 0;
}
static int cl_init_glshare()
{
cl_int err = CL_SUCCESS;
cl_platform_id P = 0;
cl_device_id D = 0;
if(!cl_pick_device_with_glshare(&P, &D)) {
printf("\nOpenCL: no GPU device with cl_khr_gl_sharing found.");
return 0;
}
gCL_Platform = P;
gCL_Device = D;
cl_context_properties props[] = {
CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
CL_CONTEXT_PLATFORM, (cl_context_properties)gCL_Platform,
0
};
gCL_Context = clCreateContext(props, 1, &gCL_Device, 0, 0, &err);
if(err != CL_SUCCESS || !gCL_Context) { cl_release_all(); return 0; }
gCL_Queue = clCreateCommandQueue(gCL_Context, gCL_Device, 0, &err);
if(err != CL_SUCCESS || !gCL_Queue) { cl_release_all(); return 0; }
gCL_Program = clCreateProgramWithSource(gCL_Context, 1, &gCL_Source, 0, &err);
if(err != CL_SUCCESS || !gCL_Program) { cl_release_all(); return 0; }
err = clBuildProgram(gCL_Program, 1, &gCL_Device, 0, 0, 0);
if(err != CL_SUCCESS)
{
char logbuf[8192];
size_t logsz = 0;
clGetProgramBuildInfo(gCL_Program, gCL_Device, CL_PROGRAM_BUILD_LOG, sizeof(logbuf), logbuf, &logsz);
printf("\nOpenCL build failed:\n%s", logbuf);
cl_release_all();
return 0;
}
gCL_K_NN = clCreateKernel(gCL_Program, "nn_render", &err);
if(err != CL_SUCCESS || !gCL_K_NN) { cl_release_all(); return 0; }
gCL_PBO = clCreateFromGLBuffer(gCL_Context, CL_MEM_WRITE_ONLY, gPBO, &err);
if(err != CL_SUCCESS || !gCL_PBO) { cl_release_all(); return 0; }
size_t bytesW1 = sizeof(float)*(size_t)NN_H*(size_t)NN_IN;
size_t bytesb1 = sizeof(float)*(size_t)NN_H;
size_t bytesW2 = sizeof(float)*(size_t)NN_OUT*(size_t)NN_H;
size_t bytesb2 = sizeof(float)*(size_t)NN_OUT;
gCL_W1 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesW1, 0, &err);
gCL_b1 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesb1, 0, &err);
gCL_W2 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesW2, 0, &err);
gCL_b2 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesb2, 0, &err);
if(err != CL_SUCCESS || !gCL_W1 || !gCL_b1 || !gCL_W2 || !gCL_b2) { cl_release_all(); return 0; }
if(!build_weights_from_libtorch(gHost_W1, gHost_b1, gHost_W2, gHost_b2)) {
printf("\n[LibTorch] Failed to build weights.");
cl_release_all();
return 0;
}
err = clEnqueueWriteBuffer(gCL_Queue, gCL_W1, CL_TRUE, 0, bytesW1, gHost_W1, 0, 0, 0);
if(err != CL_SUCCESS) { cl_release_all(); return 0; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_b1, CL_TRUE, 0, bytesb1, gHost_b1, 0, 0, 0);
if(err != CL_SUCCESS) { cl_release_all(); return 0; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_W2, CL_TRUE, 0, bytesW2, gHost_W2, 0, 0, 0);
if(err != CL_SUCCESS) { cl_release_all(); return 0; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_b2, CL_TRUE, 0, bytesb2, gHost_b2, 0, 0, 0);
if(err != CL_SUCCESS) { cl_release_all(); return 0; }
gCL_Ready = 1;
printf("\nOpenCL: GL-sharing enabled. NN kernel ready.");
return 1;
}
// ===========================================================
// Render (CL -> GL)
// ===========================================================
static void RenderFrame()
{
if(!gCL_Ready) return;
size_t global[2] = { (size_t)gW, (size_t)gH };
size_t local[2] = { 16, 16 };
cl_int err = CL_SUCCESS;
err = clEnqueueAcquireGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0);
if(err != CL_SUCCESS) return;
LARGE_INTEGER qpc;
QueryPerformanceCounter(&qpc);
gNoiseSeed = (unsigned int)(qpc.QuadPart ^ (qpc.QuadPart >> 32) ^ (LONGLONG)GetTickCount64());
evolve_params_accumulated(gPhase, gNoiseSeed);
size_t bytesW1 = sizeof(float)*(size_t)NN_H*(size_t)NN_IN;
size_t bytesb1 = sizeof(float)*(size_t)NN_H;
size_t bytesW2 = sizeof(float)*(size_t)NN_OUT*(size_t)NN_H;
size_t bytesb2 = sizeof(float)*(size_t)NN_OUT;
err = clEnqueueWriteBuffer(gCL_Queue, gCL_W1, CL_FALSE, 0, bytesW1, gHost_W1, 0, 0, 0);
if(err != CL_SUCCESS) { clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0); clFinish(gCL_Queue); return; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_b1, CL_FALSE, 0, bytesb1, gHost_b1, 0, 0, 0);
if(err != CL_SUCCESS) { clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0); clFinish(gCL_Queue); return; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_W2, CL_FALSE, 0, bytesW2, gHost_W2, 0, 0, 0);
if(err != CL_SUCCESS) { clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0); clFinish(gCL_Queue); return; }
err = clEnqueueWriteBuffer(gCL_Queue, gCL_b2, CL_FALSE, 0, bytesb2, gHost_b2, 0, 0, 0);
if(err != CL_SUCCESS) { clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0); clFinish(gCL_Queue); return; }
int arg = 0;
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_PBO);
clSetKernelArg(gCL_K_NN, arg++, sizeof(int), &gW);
clSetKernelArg(gCL_K_NN, arg++, sizeof(int), &gH);
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_W1);
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_b1);
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_W2);
clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_b2);
clSetKernelArg(gCL_K_NN, arg++, sizeof(float), &gPhase);
clSetKernelArg(gCL_K_NN, arg++, sizeof(unsigned int), &gNoiseSeed);
err = clEnqueueNDRangeKernel(gCL_Queue, gCL_K_NN, 2, 0, global, local, 0, 0, 0);
if(err != CL_SUCCESS) {
err = clEnqueueNDRangeKernel(gCL_Queue, gCL_K_NN, 2, 0, global, 0, 0, 0, 0);
}
clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0);
clFinish(gCL_Queue);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gPBO);
glBindTexture(GL_TEXTURE_2D, gTex);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, gW, gH, GL_RGBA, GL_UNSIGNED_BYTE, 0);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glClear(GL_COLOR_BUFFER_BIT);
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, gTex);
glBegin(GL_QUADS);
glTexCoord2f(0,0); glVertex2f(-1,-1);
glTexCoord2f(1,0); glVertex2f( 1,-1);
glTexCoord2f(1,1); glVertex2f( 1, 1);
glTexCoord2f(0,1); glVertex2f(-1, 1);
glEnd();
glBindTexture(GL_TEXTURE_2D, 0);
SwapBuffers(gHdc);
gPhase += 0.03f;
}
static void pack_params(float* theta)
{
int p = 0;
for(int i=0;i<NN_H*NN_IN;i++) theta[p++] = gHost_W1[i];
for(int i=0;i<NN_H;i++) theta[p++] = gHost_b1[i];
for(int i=0;i<NN_OUT*NN_H;i++)theta[p++] = gHost_W2[i];
for(int i=0;i<NN_OUT;i++) theta[p++] = gHost_b2[i];
}
static void unpack_params(const float* theta)
{
int p = 0;
for(int i=0;i<NN_H*NN_IN;i++) gHost_W1[i] = theta[p++];
for(int i=0;i<NN_H;i++) gHost_b1[i] = theta[p++];
for(int i=0;i<NN_OUT*NN_H;i++)gHost_W2[i] = theta[p++];
for(int i=0;i<NN_OUT;i++) gHost_b2[i] = theta[p++];
}
static unsigned int mix_u32(unsigned int x)
{
x ^= x >> 16;
x *= 2246822519u;
x ^= x >> 13;
x *= 3266489917u;
x ^= x >> 16;
return x;
}
static void evolve_params_accumulated(float phase, unsigned int seed)
{
float theta[NN_PARAM_COUNT];
float nextv[NN_PARAM_COUNT];
pack_params(theta);
for(int i=0;i<NN_PARAM_COUNT;i++) {
int l = (i == 0) ? (NN_PARAM_COUNT - 1) : (i - 1);
int r = (i + 1) % NN_PARAM_COUNT;
float coupled = 0.55f*theta[l] + 0.45f*theta[r];
float drift = 0.015f*sinf(0.8f*phase + 0.17f*(float)i);
unsigned int h = mix_u32(seed ^ (unsigned int)(i*747796405u + 2891336453u));
float noise = (((float)(h & 0xFFFFu) / 65535.0f) * 2.0f - 1.0f) * 0.010f;
float v = 0.982f*theta[i] + 0.022f*coupled + drift + noise;
if(v > 3.0f) v = 3.0f;
if(v < -3.0f) v = -3.0f;
nextv[i] = v;
}
unpack_params(nextv);
float mW1 = 0.0f, mb1 = 0.0f, mW2 = 0.0f, mb2 = 0.0f;
for(int i=0;i<NN_H*NN_IN;i++) mW1 += gHost_W1[i];
for(int i=0;i<NN_H;i++) mb1 += gHost_b1[i];
for(int i=0;i<NN_OUT*NN_H;i++) mW2 += gHost_W2[i];
for(int i=0;i<NN_OUT;i++) mb2 += gHost_b2[i];
mW1 /= (float)(NN_H*NN_IN);
mb1 /= (float)NN_H;
mW2 /= (float)(NN_OUT*NN_H);
mb2 /= (float)NN_OUT;
for(int i=0;i<NN_H*NN_IN;i++) gHost_W1[i] += 0.003f*(mb1 - mW1);
for(int i=0;i<NN_H;i++) gHost_b1[i] += 0.004f*(mW2 - mb1);
for(int i=0;i<NN_OUT*NN_H;i++) gHost_W2[i] += 0.003f*(mb2 - mW2);
for(int i=0;i<NN_OUT;i++) gHost_b2[i] += 0.004f*(mW1 - mb2);
}
// ===========================================================
// WinMain
// ===========================================================
int WINAPI WinMain(HINSTANCE hInst, HINSTANCE, LPSTR, int)
{
// 0 means no auto-close; window stays until user closes it.
const int maxSeconds = read_env_int("MENDB02_MAX_SECONDS", 0);
ULONGLONG startTick = GetTickCount64();
const char* szClass = "Mendb02NNCLGLClass";
UnregisterClassA(szClass, hInst);
WNDCLASSEXA wc;
ZeroMemory(&wc, sizeof(wc));
wc.cbSize = sizeof(wc);
wc.style = CS_HREDRAW | CS_VREDRAW;
wc.lpfnWndProc = WndProc;
wc.hInstance = hInst;
wc.hCursor = LoadCursor(NULL, IDC_ARROW);
wc.lpszClassName = szClass;
RegisterClassExA(&wc);
RECT r;
r.left=0; r.top=0; r.right=gW; r.bottom=gH;
AdjustWindowRect(&r, WS_OVERLAPPEDWINDOW, FALSE);
HWND hwnd = CreateWindowExA(
0, szClass, "NN Render (LibTorch weights + OpenCL + OpenGL)",
WS_OVERLAPPEDWINDOW,
100, 100, (r.right-r.left), (r.bottom-r.top),
0, 0, hInst, 0);
if(!hwnd) return 0;
ShowWindow(hwnd, SW_SHOW);
UpdateWindow(hwnd);
if(!gl_init_wgl(hwnd))
{
MessageBoxA(hwnd, "OpenGL init failed", "Error", MB_OK);
gl_release_all();
return 0;
}
if(!cl_init_glshare())
{
MessageBoxA(hwnd, "OpenCL GL-sharing init failed", "Error", MB_OK);
cl_release_all();
gl_release_all();
return 0;
}
MSG msg;
ZeroMemory(&msg, sizeof(msg));
while(msg.message != WM_QUIT)
{
while(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
{
TranslateMessage(&msg);
DispatchMessage(&msg);
}
// Allow Zorro STOP to close this Win32 loop cleanly, but ignore
// the sticky FIRSTINITRUN+EXITRUN combo seen at startup.
if(is(EXITRUN) && !is(FIRSTINITRUN)) {
PostMessage(hwnd, WM_CLOSE, 0, 0);
}
if(!IsWindow(hwnd))
break;
if(maxSeconds > 0 && (GetTickCount64() - startTick) >= (ULONGLONG)maxSeconds * 1000ULL) {
PostMessage(hwnd, WM_CLOSE, 0, 0);
}
RenderFrame();
}
cl_release_all();
gl_release_all();
gHwnd = 0;
return 0;
}
// ===========================================================
// Input
// ===========================================================
LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
{
switch(msg)
{
case WM_CLOSE:
DestroyWindow(hWnd);
return 0;
case WM_KEYDOWN:
if(wParam == VK_ESCAPE || wParam == VK_F12) {
PostMessage(hWnd, WM_CLOSE, 0, 0);
return 0;
}
return 0;
case WM_DESTROY:
PostQuitMessage(0);
return 0;
}
return DefWindowProc(hWnd, msg, wParam, lParam);
}
// ===========================================================
// Zorro DLL entry
// ===========================================================
DLLFUNC int main()
{
// Force single-cycle execution in Zorro to avoid automatic relaunches.
NumTotalCycles = 1;
NumWFOCycles = 1;
NumSampleCycles = 1;
set(TESTNOW|OFF,ALLCYCLES|OFF,PARAMETERS|OFF,FACTORS|OFF,RULES|OFF);
static int done = 0;
if(is(FIRSTINITRUN))
done = 0;
if(done)
return 0;
(void)WinMain(GetModuleHandleA(NULL), NULL, GetCommandLineA(), SW_SHOWDEFAULT);
done = 1;
return quit("!Mendb02 finished");
}
210
65,908
Read More
|
|
|