Neural Prism Renderer is a hybrid engine that blends a trading platform runtime with a real time graphics and compute pipeline. It is built as a Windows dynamic library that can be launched from an automated evaluation host, yet it behaves like a standalone visual instrument once running. The program opens a window, creates a hardware accelerated drawing context, and prepares a streaming pixel surface that can be updated every frame without copying through the CPU. It then sets up a compute backend that can share that pixel surface directly, allowing the compute side to paint the image while the graphics side simply presents it.

At its core is a tiny neural network whose parameters are created through a deep learning runtime. The network is not trained interactively in this file; instead, it is instantiated and its weights are extracted in a safe, deterministic way. Those weights are packed into simple arrays and uploaded to the compute backend. This separation is intentional: the deep learning runtime is used as a reliable source of tensor layout and parameter creation, while the compute kernel remains lightweight and portable.

Each frame, the compute kernel visits every pixel as if it were a tiny sensor on a grid. For each pixel it forms a small coordinate input, pushes that input through the neural layers, and converts the output into color channels. The result is a neural field rendered as an image. The compute kernel writes directly into a shared buffer that the graphics system can map into a texture, so the presentation step is fast and consistent.

The code is also an integration blueprint. It carefully orders includes to prevent macro collisions between the trading platform headers and the deep learning headers. It cleans up common macro landmines that can silently corrupt builds. It offers controlled shutdown paths, respects user input to close the window, and can be configured to auto exit after a chosen time. Finally, it wraps the whole interactive loop inside a single cycle execution mode so the evaluation host does not relaunch it repeatedly. In abstract terms, it is a bridge between model parameters, parallel compute, and visual feedback, packaged to coexist with an automated trading research environment.

Code
// Mendb02.cpp
// Win32 + WGL(OpenGL) display + OpenCL compute (CL/GL sharing)
// + Tiny Neural Net inference per pixel (OpenCL kernel) using weights from LibTorch.

#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif

#define _CRT_SECURE_NO_WARNINGS

// ============================================================
// 1) Include LibTorch FIRST (like your working file)
//    Public/shareable variant: no machine-specific include paths.
// ============================================================
#if defined(__has_include)
  #if __has_include(<torch/torch.h>) && __has_include(<torch/script.h>)
    #include <torch/torch.h>
    #include <torch/script.h>
  #else
    #error "LibTorch headers not found. Add LibTorch include paths to your build configuration."
  #endif
#else
  #include <torch/torch.h>
  #include <torch/script.h>
#endif

// (Optional) CUDA headers (safe pattern used by your working file)
// Keep them conditional so CPU-only LibTorch setups still compile.
#if defined(__has_include)
  #if __has_include(<torch/cuda.h>)
    #include <torch/cuda.h>
    #define HAVE_TORCH_CUDA_HEADER 1
  #else
    #define HAVE_TORCH_CUDA_HEADER 0
  #endif
  #if __has_include(<cuda_runtime_api.h>)
    #include <cuda_runtime_api.h>
    #define HAVE_CUDA_RUNTIME_API_HEADER 1
  #else
    #define HAVE_CUDA_RUNTIME_API_HEADER 0
  #endif
#else
  #define HAVE_TORCH_CUDA_HEADER 0
  #define HAVE_CUDA_RUNTIME_API_HEADER 0
#endif

#if defined(__has_include)
  #if __has_include(<c10/cuda/CUDAGuard.h>) && __has_include(<c10/cuda/impl/cuda_cmake_macros.h>)
    #include <c10/cuda/CUDAGuard.h>
    #define HAVE_C10_CUDAGUARD 1
  #else
    #define HAVE_C10_CUDAGUARD 0
  #endif
#else
  #define HAVE_C10_CUDAGUARD 0
#endif

// ============================================================
// 2) Standard headers
// ============================================================
#include <windows.h>
#include <stdio.h>
#include <math.h>
#include <stddef.h>
#include <string.h>
#include <stdlib.h>

// ============================================================
// 3) Include Zorro AFTER torch, rename Zorro's 'at' to avoid conflict
//    (exact pattern from your working file)
// ============================================================
#define at zorro_at
#ifdef LOG
#undef LOG
#endif
#include <zorro.h>
#undef at

// ============================================================
// 4) Cleanup macro landmines (exact style from your working file)
// ============================================================
#ifdef min
#undef min
#endif
#ifdef max
#undef max
#endif
#ifdef ref
#undef ref
#endif
#ifdef swap
#undef swap
#endif
#ifdef abs
#undef abs
#endif

#ifdef NTF
#undef NTF
#endif
#ifdef LOOKBACK
#undef LOOKBACK
#endif
#ifdef BINS
#undef BINS
#endif

// ============================================================
// OpenCL + OpenGL includes (after the macro cleanup is safest)
// ============================================================
#include <CL/cl.h>
#include <CL/cl_gl.h>     // cl_khr_gl_sharing
#include <CL/cl_gl_ext.h> // CL_GL_CONTEXT_KHR / CL_WGL_HDC_KHR
#include <GL/gl.h>

#ifndef GL_RGBA8
#define GL_RGBA8 0x8058
#endif

// ------------------------- Globals -------------------------
static HWND   gHwnd = 0;
static HDC    gHdc  = 0;
static HGLRC  gHgl  = 0;

static int    gW = 640;
static int    gH = 480;

static int read_env_int(const char* key, int fallback)
{
  const char* s = getenv(key);
  if(!s || !*s) return fallback;
  int v = atoi(s);
  return (v > 0) ? v : fallback;
}

// ------------------------- WinProc forward -------------------------
LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);

// ===========================================================
//                Minimal OpenGL function loading
// ===========================================================

#ifndef GL_ARRAY_BUFFER
#define GL_ARRAY_BUFFER 0x8892
#endif
#ifndef GL_PIXEL_UNPACK_BUFFER
#define GL_PIXEL_UNPACK_BUFFER 0x88EC
#endif
#ifndef GL_DYNAMIC_DRAW
#define GL_DYNAMIC_DRAW 0x88E8
#endif

#ifndef APIENTRY
#define APIENTRY __stdcall
#endif
#ifndef APIENTRYP
#define APIENTRYP APIENTRY *
#endif

typedef void (APIENTRYP PFNGLGENBUFFERSPROC)(GLsizei, GLuint*);
typedef void (APIENTRYP PFNGLBINDBUFFERPROC)(GLenum, GLuint);
typedef void (APIENTRYP PFNGLBUFFERDATAPROC)(GLenum, ptrdiff_t, const void*, GLenum);
typedef void (APIENTRYP PFNGLDELETEBUFFERSPROC)(GLsizei, const GLuint*);

static PFNGLGENBUFFERSPROC    p_glGenBuffers    = 0;
static PFNGLBINDBUFFERPROC    p_glBindBuffer    = 0;
static PFNGLBUFFERDATAPROC    p_glBufferData    = 0;
static PFNGLDELETEBUFFERSPROC p_glDeleteBuffers = 0;

static void* gl_get_proc(const char* name)
{
  void* p = (void*)wglGetProcAddress(name);
  if(!p) {
    HMODULE ogl = GetModuleHandleA("opengl32.dll");
    if(ogl) p = (void*)GetProcAddress(ogl, name);
  }
  return p;
}

static int gl_load_ext()
{
  p_glGenBuffers    = (PFNGLGENBUFFERSPROC)gl_get_proc("glGenBuffers");
  p_glBindBuffer    = (PFNGLBINDBUFFERPROC)gl_get_proc("glBindBuffer");
  p_glBufferData    = (PFNGLBUFFERDATAPROC)gl_get_proc("glBufferData");
  p_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)gl_get_proc("glDeleteBuffers");

  if(!p_glGenBuffers || !p_glBindBuffer || !p_glBufferData || !p_glDeleteBuffers)
    return 0;
  return 1;
}

// ===========================================================
//                       OpenGL objects
// ===========================================================

static GLuint gPBO = 0;
static GLuint gTex = 0;

static void gl_release_all()
{
  if(gTex) {
    glDeleteTextures(1, &gTex);
    gTex = 0;
  }
  if(gPBO) {
    if(p_glDeleteBuffers) p_glDeleteBuffers(1, &gPBO);
    gPBO = 0;
  }

  if(gHgl) { wglMakeCurrent(NULL, NULL); wglDeleteContext(gHgl); gHgl = 0; }
  if(gHdc && gHwnd) { ReleaseDC(gHwnd, gHdc); gHdc = 0; }
}

static int gl_init_wgl(HWND hwnd)
{
  gHwnd = hwnd;
  gHdc = GetDC(hwnd);
  if(!gHdc) return 0;

  PIXELFORMATDESCRIPTOR pfd;
  ZeroMemory(&pfd, sizeof(pfd));
  pfd.nSize      = sizeof(pfd);
  pfd.nVersion   = 1;
  pfd.dwFlags    = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
  pfd.iPixelType = PFD_TYPE_RGBA;
  pfd.cColorBits = 32;
  pfd.cDepthBits = 16;
  pfd.iLayerType = PFD_MAIN_PLANE;

  int pf = ChoosePixelFormat(gHdc, &pfd);
  if(pf == 0) return 0;
  if(!SetPixelFormat(gHdc, pf, &pfd)) return 0;

  gHgl = wglCreateContext(gHdc);
  if(!gHgl) return 0;
  if(!wglMakeCurrent(gHdc, gHgl)) return 0;

  if(!gl_load_ext()) {
    printf("\nOpenGL buffer functions not available (need VBO/PBO support).");
    return 0;
  }

  glDisable(GL_DEPTH_TEST);
  glViewport(0, 0, gW, gH);

  // Create PBO for RGBA pixels
  p_glGenBuffers(1, &gPBO);
  p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gPBO);
  p_glBufferData(GL_PIXEL_UNPACK_BUFFER, (ptrdiff_t)(gW * gH * 4), 0, GL_DYNAMIC_DRAW);
  p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

  // Create texture
  glGenTextures(1, &gTex);
  glBindTexture(GL_TEXTURE_2D, gTex);
  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
  glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, gW, gH, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
  glBindTexture(GL_TEXTURE_2D, 0);

  return 1;
}

// ===========================================================
//                  Tiny NN (LibTorch -> weights)
// ===========================================================

#define NN_IN 2
#define NN_H 16
#define NN_OUT 3

struct TinyMLPImpl : torch::nn::Module {
  torch::nn::Linear fc1{nullptr}, fc2{nullptr};
  TinyMLPImpl() {
    fc1 = register_module("fc1", torch::nn::Linear(NN_IN, NN_H));
    fc2 = register_module("fc2", torch::nn::Linear(NN_H, NN_OUT));
  }
  torch::Tensor forward(torch::Tensor x) {
    x = torch::tanh(fc1->forward(x));
    x = torch::tanh(fc2->forward(x));
    return x;
  }
};
TORCH_MODULE(TinyMLP);

static int build_weights_from_libtorch(float* W1, float* b1, float* W2, float* b2)
{
  if(!W1 || !b1 || !W2 || !b2) return 0;

  try {
    torch::NoGradGuard ng;
    torch::manual_seed(1);
    TinyMLP m;
    m->eval();

    auto w1  = m->fc1->weight.detach().contiguous().to(torch::kCPU);
    auto bb1 = m->fc1->bias.detach().contiguous().to(torch::kCPU);

    auto w2  = m->fc2->weight.detach().contiguous().to(torch::kCPU);
    auto bb2 = m->fc2->bias.detach().contiguous().to(torch::kCPU);

    memcpy(W1, w1.data_ptr<float>(),  sizeof(float)*NN_H*NN_IN);
    memcpy(b1, bb1.data_ptr<float>(), sizeof(float)*NN_H);
    memcpy(W2, w2.data_ptr<float>(),  sizeof(float)*NN_OUT*NN_H);
    memcpy(b2, bb2.data_ptr<float>(), sizeof(float)*NN_OUT);

    return 1;
  }
  catch(const c10::Error& e) {
    printf("\n[LibTorch] Error: %s", e.what());
    return 0;
  }
  catch(...) {
    printf("\n[LibTorch] Unknown error.");
    return 0;
  }
}

// ===========================================================
//                    OpenCL (GL sharing)
// ===========================================================

static int gCL_Ready = 0;

static cl_platform_id   gCL_Platform = 0;
static cl_device_id     gCL_Device   = 0;
static cl_context       gCL_Context  = 0;
static cl_command_queue gCL_Queue    = 0;
static cl_program       gCL_Program  = 0;

static cl_kernel        gCL_K_NN     = 0;

static cl_mem gCL_PBO = 0; // CL view of GL PBO

static cl_mem gCL_W1 = 0;
static cl_mem gCL_b1 = 0;
static cl_mem gCL_W2 = 0;
static cl_mem gCL_b2 = 0;

#define STR2(x) #x
#define XSTR(x) STR2(x)

static const char* gCL_Source =
"__kernel void nn_render(__global uchar4* out, int width, int height,              \n"
"  __global const float* W1, __global const float* b1,                             \n"
"  __global const float* W2, __global const float* b2)                             \n"
"{                                                                                 \n"
"  int xpix = (int)get_global_id(0);                                               \n"
"  int ypix = (int)get_global_id(1);                                               \n"
"  if(xpix >= width || ypix >= height) return;                                     \n"
"                                                                                  \n"
"  float x = ((float)xpix / (float)(width  - 1)) * 2.0f - 1.0f;                    \n"
"  float y = ((float)ypix / (float)(height - 1)) * 2.0f - 1.0f;                    \n"
"  float in0 = x;                                                                  \n"
"  float in1 = -y;                                                                 \n"
"                                                                                  \n"
"  float h[" XSTR(NN_H) "];                                                        \n"
"  for(int j=0;j<" XSTR(NN_H) ";j++){                                              \n"
"    float acc = b1[j];                                                            \n"
"    acc += in0 * W1[j*" XSTR(NN_IN) " + 0];                                       \n"
"    acc += in1 * W1[j*" XSTR(NN_IN) " + 1];                                       \n"
"    h[j] = tanh(acc);                                                             \n"
"  }                                                                               \n"
"                                                                                  \n"
"  float o[" XSTR(NN_OUT) "];                                                      \n"
"  for(int k=0;k<" XSTR(NN_OUT) ";k++){                                            \n"
"    float acc = b2[k];                                                            \n"
"    for(int j=0;j<" XSTR(NN_H) ";j++){                                            \n"
"      acc += h[j] * W2[k*" XSTR(NN_H) " + j];                                     \n"
"    }                                                                             \n"
"    float s = 0.5f + 0.5f*tanh(acc);                                              \n"
"    if(s<0) s=0; if(s>1) s=1;                                                     \n"
"    o[k] = s;                                                                     \n"
"  }                                                                               \n"
"                                                                                  \n"
"  uchar r = (uchar)(255.0f*o[0]);                                                 \n"
"  uchar g = (uchar)(255.0f*o[1]);                                                 \n"
"  uchar b = (uchar)(255.0f*o[2]);                                                 \n"
"  out[ypix*width + xpix] = (uchar4)(r,g,b,255);                                   \n"
"}                                                                                 \n";

static void cl_release_all()
{
  if(gCL_b2) { clReleaseMemObject(gCL_b2); gCL_b2 = 0; }
  if(gCL_W2) { clReleaseMemObject(gCL_W2); gCL_W2 = 0; }
  if(gCL_b1) { clReleaseMemObject(gCL_b1); gCL_b1 = 0; }
  if(gCL_W1) { clReleaseMemObject(gCL_W1); gCL_W1 = 0; }

  if(gCL_PBO)    { clReleaseMemObject(gCL_PBO);    gCL_PBO = 0; }
  if(gCL_K_NN)   { clReleaseKernel(gCL_K_NN);      gCL_K_NN = 0; }
  if(gCL_Program){ clReleaseProgram(gCL_Program);  gCL_Program = 0; }
  if(gCL_Queue)  { clReleaseCommandQueue(gCL_Queue); gCL_Queue = 0; }
  if(gCL_Context){ clReleaseContext(gCL_Context);  gCL_Context = 0; }

  gCL_Device = 0;
  gCL_Platform = 0;
  gCL_Ready = 0;
}

static int cl_pick_device_with_glshare(cl_platform_id* outP, cl_device_id* outD)
{
  cl_uint nPlatforms = 0;
  if(clGetPlatformIDs(0, 0, &nPlatforms) != CL_SUCCESS || nPlatforms == 0)
    return 0;

  cl_platform_id platforms[8];
  if(nPlatforms > 8) nPlatforms = 8;
  if(clGetPlatformIDs(nPlatforms, platforms, &nPlatforms) != CL_SUCCESS)
    return 0;

  for(cl_uint p=0; p<nPlatforms; p++)
  {
    cl_uint nDev = 0;
    if(clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_GPU, 0, 0, &nDev) != CL_SUCCESS || nDev == 0)
      continue;

    cl_device_id devs[8];
    if(nDev > 8) nDev = 8;
    if(clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_GPU, nDev, devs, &nDev) != CL_SUCCESS)
      continue;

    for(cl_uint d=0; d<nDev; d++)
    {
      char ext[8192];
      size_t sz = 0;
      if(clGetDeviceInfo(devs[d], CL_DEVICE_EXTENSIONS, sizeof(ext), ext, &sz) != CL_SUCCESS)
        continue;

      if(strstr(ext, "cl_khr_gl_sharing"))
      {
        *outP = platforms[p];
        *outD = devs[d];
        return 1;
      }
    }
  }

  return 0;
}

static int cl_init_glshare()
{
  cl_int err = CL_SUCCESS;

  cl_platform_id P = 0;
  cl_device_id   D = 0;

  if(!cl_pick_device_with_glshare(&P, &D)) {
    printf("\nOpenCL: no GPU device with cl_khr_gl_sharing found.");
    return 0;
  }

  gCL_Platform = P;
  gCL_Device   = D;

  cl_context_properties props[] = {
    CL_GL_CONTEXT_KHR,   (cl_context_properties)wglGetCurrentContext(),
    CL_WGL_HDC_KHR,      (cl_context_properties)wglGetCurrentDC(),
    CL_CONTEXT_PLATFORM, (cl_context_properties)gCL_Platform,
    0
  };

  gCL_Context = clCreateContext(props, 1, &gCL_Device, 0, 0, &err);
  if(err != CL_SUCCESS || !gCL_Context) { cl_release_all(); return 0; }

  gCL_Queue = clCreateCommandQueue(gCL_Context, gCL_Device, 0, &err);
  if(err != CL_SUCCESS || !gCL_Queue) { cl_release_all(); return 0; }

  gCL_Program = clCreateProgramWithSource(gCL_Context, 1, &gCL_Source, 0, &err);
  if(err != CL_SUCCESS || !gCL_Program) { cl_release_all(); return 0; }

  err = clBuildProgram(gCL_Program, 1, &gCL_Device, 0, 0, 0);
  if(err != CL_SUCCESS)
  {
    char logbuf[8192];
    size_t logsz = 0;
    clGetProgramBuildInfo(gCL_Program, gCL_Device, CL_PROGRAM_BUILD_LOG, sizeof(logbuf), logbuf, &logsz);
    printf("\nOpenCL build failed:\n%s", logbuf);
    cl_release_all();
    return 0;
  }

  gCL_K_NN = clCreateKernel(gCL_Program, "nn_render", &err);
  if(err != CL_SUCCESS || !gCL_K_NN) { cl_release_all(); return 0; }

  gCL_PBO = clCreateFromGLBuffer(gCL_Context, CL_MEM_WRITE_ONLY, gPBO, &err);
  if(err != CL_SUCCESS || !gCL_PBO) { cl_release_all(); return 0; }

  size_t bytesW1 = sizeof(float)*(size_t)NN_H*(size_t)NN_IN;
  size_t bytesb1 = sizeof(float)*(size_t)NN_H;
  size_t bytesW2 = sizeof(float)*(size_t)NN_OUT*(size_t)NN_H;
  size_t bytesb2 = sizeof(float)*(size_t)NN_OUT;

  gCL_W1 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesW1, 0, &err);
  gCL_b1 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesb1, 0, &err);
  gCL_W2 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesW2, 0, &err);
  gCL_b2 = clCreateBuffer(gCL_Context, CL_MEM_READ_ONLY, bytesb2, 0, &err);
  if(err != CL_SUCCESS || !gCL_W1 || !gCL_b1 || !gCL_W2 || !gCL_b2) { cl_release_all(); return 0; }

  float hW1[NN_H*NN_IN];
  float hb1[NN_H];
  float hW2[NN_OUT*NN_H];
  float hb2[NN_OUT];

  if(!build_weights_from_libtorch(hW1, hb1, hW2, hb2)) {
    printf("\n[LibTorch] Failed to build weights.");
    cl_release_all();
    return 0;
  }

  err = clEnqueueWriteBuffer(gCL_Queue, gCL_W1, CL_TRUE, 0, bytesW1, hW1, 0, 0, 0);
  if(err != CL_SUCCESS) { cl_release_all(); return 0; }
  err = clEnqueueWriteBuffer(gCL_Queue, gCL_b1, CL_TRUE, 0, bytesb1, hb1, 0, 0, 0);
  if(err != CL_SUCCESS) { cl_release_all(); return 0; }
  err = clEnqueueWriteBuffer(gCL_Queue, gCL_W2, CL_TRUE, 0, bytesW2, hW2, 0, 0, 0);
  if(err != CL_SUCCESS) { cl_release_all(); return 0; }
  err = clEnqueueWriteBuffer(gCL_Queue, gCL_b2, CL_TRUE, 0, bytesb2, hb2, 0, 0, 0);
  if(err != CL_SUCCESS) { cl_release_all(); return 0; }

  gCL_Ready = 1;
  printf("\nOpenCL: GL-sharing enabled. NN kernel ready.");
  return 1;
}

// ===========================================================
//                      Render (CL -> GL)
// ===========================================================

static void RenderFrame()
{
  if(!gCL_Ready) return;

  size_t global[2] = { (size_t)gW, (size_t)gH };
  size_t local[2]  = { 16, 16 };

  cl_int err = CL_SUCCESS;

  err = clEnqueueAcquireGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0);
  if(err != CL_SUCCESS) return;

  int arg = 0;
  clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_PBO);
  clSetKernelArg(gCL_K_NN, arg++, sizeof(int),    &gW);
  clSetKernelArg(gCL_K_NN, arg++, sizeof(int),    &gH);
  clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_W1);
  clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_b1);
  clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_W2);
  clSetKernelArg(gCL_K_NN, arg++, sizeof(cl_mem), &gCL_b2);

  err = clEnqueueNDRangeKernel(gCL_Queue, gCL_K_NN, 2, 0, global, local, 0, 0, 0);
  if(err != CL_SUCCESS) {
    err = clEnqueueNDRangeKernel(gCL_Queue, gCL_K_NN, 2, 0, global, 0, 0, 0, 0);
  }

  clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0);
  clFinish(gCL_Queue);

  p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gPBO);
  glBindTexture(GL_TEXTURE_2D, gTex);
  glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, gW, gH, GL_RGBA, GL_UNSIGNED_BYTE, 0);
  p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

  glClear(GL_COLOR_BUFFER_BIT);
  glEnable(GL_TEXTURE_2D);
  glBindTexture(GL_TEXTURE_2D, gTex);

  glBegin(GL_QUADS);
    glTexCoord2f(0,0); glVertex2f(-1,-1);
    glTexCoord2f(1,0); glVertex2f( 1,-1);
    glTexCoord2f(1,1); glVertex2f( 1, 1);
    glTexCoord2f(0,1); glVertex2f(-1, 1);
  glEnd();

  glBindTexture(GL_TEXTURE_2D, 0);
  SwapBuffers(gHdc);
}

// ===========================================================
//                         WinMain
// ===========================================================

int WINAPI WinMain(HINSTANCE hInst, HINSTANCE, LPSTR, int)
{
  // 0 means no auto-close; window stays until user closes it.
  const int maxSeconds = read_env_int("MENDB02_MAX_SECONDS", 0);
  ULONGLONG startTick = GetTickCount64();

  const char* szClass = "Mendb02NNCLGLClass";
  UnregisterClassA(szClass, hInst);

  WNDCLASSEXA wc;
  ZeroMemory(&wc, sizeof(wc));
  wc.cbSize = sizeof(wc);
  wc.style = CS_HREDRAW | CS_VREDRAW;
  wc.lpfnWndProc = WndProc;
  wc.hInstance = hInst;
  wc.hCursor = LoadCursor(NULL, IDC_ARROW);
  wc.lpszClassName = szClass;
  RegisterClassExA(&wc);

  RECT r;
  r.left=0; r.top=0; r.right=gW; r.bottom=gH;
  AdjustWindowRect(&r, WS_OVERLAPPEDWINDOW, FALSE);

  HWND hwnd = CreateWindowExA(
    0, szClass, "NN Render (LibTorch weights + OpenCL + OpenGL)",
    WS_OVERLAPPEDWINDOW,
    100, 100, (r.right-r.left), (r.bottom-r.top),
    0, 0, hInst, 0);

  if(!hwnd) return 0;

  ShowWindow(hwnd, SW_SHOW);
  UpdateWindow(hwnd);

  if(!gl_init_wgl(hwnd))
  {
    MessageBoxA(hwnd, "OpenGL init failed", "Error", MB_OK);
    gl_release_all();
    return 0;
  }

  if(!cl_init_glshare())
  {
    MessageBoxA(hwnd, "OpenCL GL-sharing init failed", "Error", MB_OK);
    cl_release_all();
    gl_release_all();
    return 0;
  }

  MSG msg;
  ZeroMemory(&msg, sizeof(msg));

  while(msg.message != WM_QUIT)
  {
    while(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
    {
      TranslateMessage(&msg);
      DispatchMessage(&msg);
    }

    // Allow Zorro STOP to close this Win32 loop cleanly, but ignore
    // the sticky FIRSTINITRUN+EXITRUN combo seen at startup.
    if(is(EXITRUN) && !is(FIRSTINITRUN)) {
      PostMessage(hwnd, WM_CLOSE, 0, 0);
    }

    if(!IsWindow(hwnd))
      break;

    if(maxSeconds > 0 && (GetTickCount64() - startTick) >= (ULONGLONG)maxSeconds * 1000ULL) {
      PostMessage(hwnd, WM_CLOSE, 0, 0);
    }

    RenderFrame();
  }

  cl_release_all();
  gl_release_all();
  gHwnd = 0;
  return 0;
}

// ===========================================================
//                         Input
// ===========================================================

LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
{
  switch(msg)
  {
    case WM_CLOSE:
      DestroyWindow(hWnd);
      return 0;

    case WM_KEYDOWN:
      if(wParam == VK_ESCAPE || wParam == VK_F12) {
        PostMessage(hWnd, WM_CLOSE, 0, 0);
        return 0;
      }
      return 0;

    case WM_DESTROY:
      PostQuitMessage(0);
      return 0;
  }
  return DefWindowProc(hWnd, msg, wParam, lParam);
}

// ===========================================================
//                         Zorro DLL entry
// ===========================================================

DLLFUNC int main()
{
  // Force single-cycle execution in Zorro to avoid automatic relaunches.
  NumTotalCycles = 1;
  NumWFOCycles = 1;
  NumSampleCycles = 1;
  set(TESTNOW|OFF,ALLCYCLES|OFF,PARAMETERS|OFF,FACTORS|OFF,RULES|OFF);

  static int done = 0;
  if(is(FIRSTINITRUN))
    done = 0;

  if(done)
    return 0;

  (void)WinMain(GetModuleHandleA(NULL), NULL, GetCommandLineA(), SW_SHOWDEFAULT);
  done = 1;
  return quit("!Mendb02 finished");
}

Last edited by TipmyPip; Yesterday at 18:04.