// mandelbrot_gl_cl_demo.cpp
// Win32 + WGL(OpenGL) display + OpenCL compute (CL/GL sharing)
//
// Build notes (MSVC):
// link: OpenGL32.lib OpenCL.lib User32.lib Gdi32.lib
//
// If you want this as a Zorro64 C++ strategy DLL, you can add:
// #include <zorro.h>
// and change entry point to DLLFUNC int main() { ...; return quit("done"); }
// For now it's a plain Win32 EXE-style main(WINARGS).
#include <windows.h>
#include <stdio.h>
#include <math.h>
#include <stddef.h>
#include <zorro.h>
#include <CL/cl.h>
#include <CL/cl_gl.h> // cl_khr_gl_sharing
#include <GL/gl.h>
// ------------------------- Globals -------------------------
static HWND gHwnd = 0;
static HDC gHdc = 0;
static HGLRC gHgl = 0;
static int gW = 640;
static int gH = 480;
static double m_x = 0.344142;
static double m_y = 0.075094;
static double m_width = 0.017813;
static int gDetail = 200;
// ------------------------- WinProc forward -------------------------
LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam);
// ===========================================================
// Minimal OpenGL function loading
// ===========================================================
#ifndef GL_ARRAY_BUFFER
#define GL_ARRAY_BUFFER 0x8892
#endif
#ifndef GL_PIXEL_UNPACK_BUFFER
#define GL_PIXEL_UNPACK_BUFFER 0x88EC
#endif
#ifndef GL_DYNAMIC_DRAW
#define GL_DYNAMIC_DRAW 0x88E8
#endif
#ifndef APIENTRY
#define APIENTRY __stdcall
#endif
#ifndef APIENTRYP
#define APIENTRYP APIENTRY *
#endif
typedef void (APIENTRYP PFNGLGENBUFFERSPROC)(GLsizei, GLuint*);
typedef void (APIENTRYP PFNGLBINDBUFFERPROC)(GLenum, GLuint);
typedef void (APIENTRYP PFNGLBUFFERDATAPROC)(GLenum, ptrdiff_t, const void*, GLenum);
typedef void (APIENTRYP PFNGLDELETEBUFFERSPROC)(GLsizei, const GLuint*);
static PFNGLGENBUFFERSPROC p_glGenBuffers = 0;
static PFNGLBINDBUFFERPROC p_glBindBuffer = 0;
static PFNGLBUFFERDATAPROC p_glBufferData = 0;
static PFNGLDELETEBUFFERSPROC p_glDeleteBuffers = 0;
static void* gl_get_proc(const char* name)
{
void* p = (void*)wglGetProcAddress(name);
if(!p) {
HMODULE ogl = GetModuleHandleA("opengl32.dll");
if(ogl) p = (void*)GetProcAddress(ogl, name);
}
return p;
}
static int gl_load_ext()
{
p_glGenBuffers = (PFNGLGENBUFFERSPROC)gl_get_proc("glGenBuffers");
p_glBindBuffer = (PFNGLBINDBUFFERPROC)gl_get_proc("glBindBuffer");
p_glBufferData = (PFNGLBUFFERDATAPROC)gl_get_proc("glBufferData");
p_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)gl_get_proc("glDeleteBuffers");
if(!p_glGenBuffers || !p_glBindBuffer || !p_glBufferData || !p_glDeleteBuffers)
return 0;
return 1;
}
// ===========================================================
// OpenGL objects
// ===========================================================
static GLuint gPBO = 0;
static GLuint gTex = 0;
static void gl_release_all()
{
if(gTex) {
glDeleteTextures(1, &gTex);
gTex = 0;
}
if(gPBO) {
p_glDeleteBuffers(1, &gPBO);
gPBO = 0;
}
if(gHgl) { wglMakeCurrent(NULL, NULL); wglDeleteContext(gHgl); gHgl = 0; }
if(gHdc && gHwnd) { ReleaseDC(gHwnd, gHdc); gHdc = 0; }
}
static int gl_init_wgl(HWND hwnd)
{
gHwnd = hwnd;
gHdc = GetDC(hwnd);
if(!gHdc) return 0;
PIXELFORMATDESCRIPTOR pfd;
ZeroMemory(&pfd, sizeof(pfd));
pfd.nSize = sizeof(pfd);
pfd.nVersion = 1;
pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
pfd.iPixelType = PFD_TYPE_RGBA;
pfd.cColorBits = 32;
pfd.cDepthBits = 16;
pfd.iLayerType = PFD_MAIN_PLANE;
int pf = ChoosePixelFormat(gHdc, &pfd);
if(pf == 0) return 0;
if(!SetPixelFormat(gHdc, pf, &pfd)) return 0;
gHgl = wglCreateContext(gHdc);
if(!gHgl) return 0;
if(!wglMakeCurrent(gHdc, gHgl)) return 0;
if(!gl_load_ext()) {
printf("\nOpenGL buffer functions not available (need VBO/PBO support).");
return 0;
}
// Setup a basic 2D projection for immediate-mode quad
glDisable(GL_DEPTH_TEST);
glViewport(0, 0, gW, gH);
// Create PBO for RGBA pixels
p_glGenBuffers(1, &gPBO);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gPBO);
p_glBufferData(GL_PIXEL_UNPACK_BUFFER, (ptrdiff_t)(gW * gH * 4), 0, GL_DYNAMIC_DRAW);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
// Create texture
glGenTextures(1, &gTex);
glBindTexture(GL_TEXTURE_2D, gTex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, gW, gH, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glBindTexture(GL_TEXTURE_2D, 0);
return 1;
}
// ===========================================================
// OpenCL (GL sharing)
// ===========================================================
static int gCL_Ready = 0;
static cl_platform_id gCL_Platform = 0;
static cl_device_id gCL_Device = 0;
static cl_context gCL_Context = 0;
static cl_command_queue gCL_Queue = 0;
static cl_program gCL_Program = 0;
static cl_kernel gCL_K_Mandel = 0;
static cl_mem gCL_PBO = 0; // CL view of GL PBO
static const char* gCL_Source =
"__kernel void mandelbrot(__global uchar4* out, int width, int height, \n"
" double mx, double my, double mwidth, int detail) \n"
"{ \n"
" int xpix = get_global_id(0); \n"
" int ypix = get_global_id(1); \n"
" if(xpix >= width || ypix >= height) return; \n"
" \n"
" double x0 = mx + ((double)xpix) * mwidth / (double)width; \n"
" double y0 = my + ((double)(height - 1 - ypix)) * mwidth / (double)width; \n"
" \n"
" double zx = 0.0, zy = 0.0; \n"
" int times = 0; \n"
" int inset = 1; \n"
" while(inset && times < detail) \n"
" { \n"
" times++; \n"
" double zxs = zx*zx; \n"
" double zys = zy*zy; \n"
" zy = 2.0*zx*zy + y0; \n"
" zx = zxs - zys + x0; \n"
" if(zxs + zys >= 4.0) inset = 0; \n"
" } \n"
" \n"
" uchar4 c; \n"
" if(inset) { \n"
" c = (uchar4)(0,0,0,255); \n"
" } else { \n"
" float t = (float)times / (float)detail; \n"
" float r = clamp(1.5f - fabs(4.f*t - 3.f), 0.f, 1.f); \n"
" float g = clamp(1.5f - fabs(4.f*t - 2.f), 0.f, 1.f); \n"
" float b = clamp(1.5f - fabs(4.f*t - 1.f), 0.f, 1.f); \n"
" c = (uchar4)((uchar)(255*r),(uchar)(255*g),(uchar)(255*b),255); \n"
" } \n"
" \n"
" out[ypix*width + xpix] = c; \n"
"} \n";
static void cl_release_all()
{
if(gCL_PBO) { clReleaseMemObject(gCL_PBO); gCL_PBO = 0; }
if(gCL_K_Mandel){ clReleaseKernel(gCL_K_Mandel); gCL_K_Mandel = 0; }
if(gCL_Program) { clReleaseProgram(gCL_Program); gCL_Program = 0; }
if(gCL_Queue) { clReleaseCommandQueue(gCL_Queue);gCL_Queue = 0; }
if(gCL_Context) { clReleaseContext(gCL_Context); gCL_Context = 0; }
gCL_Device = 0;
gCL_Platform = 0;
gCL_Ready = 0;
}
// Find a GPU device that supports cl_khr_gl_sharing (best effort)
static int cl_pick_device_with_glshare(cl_platform_id* outP, cl_device_id* outD)
{
cl_uint nPlatforms = 0;
if(clGetPlatformIDs(0, 0, &nPlatforms) != CL_SUCCESS || nPlatforms == 0)
return 0;
cl_platform_id platforms[8];
if(nPlatforms > 8) nPlatforms = 8;
if(clGetPlatformIDs(nPlatforms, platforms, &nPlatforms) != CL_SUCCESS)
return 0;
for(cl_uint p=0; p<nPlatforms; p++)
{
cl_uint nDev = 0;
if(clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_GPU, 0, 0, &nDev) != CL_SUCCESS || nDev == 0)
continue;
cl_device_id devs[8];
if(nDev > 8) nDev = 8;
if(clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_GPU, nDev, devs, &nDev) != CL_SUCCESS)
continue;
for(cl_uint d=0; d<nDev; d++)
{
char ext[8192];
size_t sz = 0;
if(clGetDeviceInfo(devs[d], CL_DEVICE_EXTENSIONS, sizeof(ext), ext, &sz) != CL_SUCCESS)
continue;
// Must contain cl_khr_gl_sharing for Acquire/Release GL objects
if(strstr(ext, "cl_khr_gl_sharing"))
{
*outP = platforms[p];
*outD = devs[d];
return 1;
}
}
}
return 0;
}
static int cl_init_glshare()
{
cl_int err = CL_SUCCESS;
cl_platform_id P = 0;
cl_device_id D = 0;
if(!cl_pick_device_with_glshare(&P, &D)) {
printf("\nOpenCL: no GPU device with cl_khr_gl_sharing found.");
return 0;
}
gCL_Platform = P;
gCL_Device = D;
// Create an OpenCL context that shares with the CURRENT WGL context
cl_context_properties props[] = {
CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
CL_CONTEXT_PLATFORM, (cl_context_properties)gCL_Platform,
0
};
gCL_Context = clCreateContext(props, 1, &gCL_Device, 0, 0, &err);
if(err != CL_SUCCESS || !gCL_Context) { cl_release_all(); return 0; }
gCL_Queue = clCreateCommandQueue(gCL_Context, gCL_Device, 0, &err);
if(err != CL_SUCCESS || !gCL_Queue) { cl_release_all(); return 0; }
gCL_Program = clCreateProgramWithSource(gCL_Context, 1, &gCL_Source, 0, &err);
if(err != CL_SUCCESS || !gCL_Program) { cl_release_all(); return 0; }
// Enable double precision if needed by your device; some require -cl-fast-relaxed-math etc.
// We keep it empty; if build fails, print log.
err = clBuildProgram(gCL_Program, 1, &gCL_Device, 0, 0, 0);
if(err != CL_SUCCESS)
{
char logbuf[8192];
size_t logsz = 0;
clGetProgramBuildInfo(gCL_Program, gCL_Device, CL_PROGRAM_BUILD_LOG, sizeof(logbuf), logbuf, &logsz);
printf("\nOpenCL build failed:\n%s", logbuf);
cl_release_all();
return 0;
}
gCL_K_Mandel = clCreateKernel(gCL_Program, "mandelbrot", &err);
if(err != CL_SUCCESS || !gCL_K_Mandel) { cl_release_all(); return 0; }
// Create CL memory object from GL PBO
gCL_PBO = clCreateFromGLBuffer(gCL_Context, CL_MEM_WRITE_ONLY, gPBO, &err);
if(err != CL_SUCCESS || !gCL_PBO) { cl_release_all(); return 0; }
gCL_Ready = 1;
printf("\nOpenCL: GL-sharing enabled.");
return 1;
}
// ===========================================================
// Render (CL -> GL)
// ===========================================================
static void RenderFrame()
{
if(!gCL_Ready) return;
size_t global[2];
global[0] = (size_t)gW;
global[1] = (size_t)gH;
// Optional local size (often helps). If it fails on some devices, set NULL instead.
size_t local[2];
local[0] = 16;
local[1] = 16;
cl_int err = CL_SUCCESS;
// Acquire the GL PBO for CL
err = clEnqueueAcquireGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0);
if(err != CL_SUCCESS) return;
int arg = 0;
clSetKernelArg(gCL_K_Mandel, arg++, sizeof(cl_mem), &gCL_PBO);
clSetKernelArg(gCL_K_Mandel, arg++, sizeof(int), &gW);
clSetKernelArg(gCL_K_Mandel, arg++, sizeof(int), &gH);
clSetKernelArg(gCL_K_Mandel, arg++, sizeof(double), &m_x);
clSetKernelArg(gCL_K_Mandel, arg++, sizeof(double), &m_y);
clSetKernelArg(gCL_K_Mandel, arg++, sizeof(double), &m_width);
clSetKernelArg(gCL_K_Mandel, arg++, sizeof(int), &gDetail);
err = clEnqueueNDRangeKernel(gCL_Queue, gCL_K_Mandel, 2, 0, global, local, 0, 0, 0);
if(err != CL_SUCCESS)
{
// Fallback: try without local size
err = clEnqueueNDRangeKernel(gCL_Queue, gCL_K_Mandel, 2, 0, global, 0, 0, 0, 0);
}
clEnqueueReleaseGLObjects(gCL_Queue, 1, &gCL_PBO, 0, 0, 0);
clFinish(gCL_Queue);
// Upload PBO -> texture (GPU->GPU)
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, gPBO);
glBindTexture(GL_TEXTURE_2D, gTex);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, gW, gH, GL_RGBA, GL_UNSIGNED_BYTE, 0);
p_glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
// Draw fullscreen quad (compat immediate mode)
glClear(GL_COLOR_BUFFER_BIT);
glEnable(GL_TEXTURE_2D);
glBindTexture(GL_TEXTURE_2D, gTex);
glBegin(GL_QUADS);
glTexCoord2f(0,0); glVertex2f(-1,-1);
glTexCoord2f(1,0); glVertex2f( 1,-1);
glTexCoord2f(1,1); glVertex2f( 1, 1);
glTexCoord2f(0,1); glVertex2f(-1, 1);
glEnd();
glBindTexture(GL_TEXTURE_2D, 0);
SwapBuffers(gHdc);
}
// ===========================================================
// WinMain
// ===========================================================
int WINAPI WinMain(HINSTANCE hInst, HINSTANCE, LPSTR, int)
{
const char* szClass = "MandelCLGLClass";
UnregisterClassA(szClass, hInst);
WNDCLASSEXA wc;
ZeroMemory(&wc, sizeof(wc));
wc.cbSize = sizeof(wc);
wc.style = CS_HREDRAW | CS_VREDRAW;
wc.lpfnWndProc = WndProc;
wc.hInstance = hInst;
wc.hCursor = LoadCursor(NULL, IDC_ARROW);
wc.lpszClassName = szClass;
RegisterClassExA(&wc);
RECT r;
r.left=0; r.top=0; r.right=gW; r.bottom=gH;
AdjustWindowRect(&r, WS_OVERLAPPEDWINDOW, FALSE);
HWND hwnd = CreateWindowExA(
0, szClass, "Mandelbrot (OpenCL compute + OpenGL display)",
WS_OVERLAPPEDWINDOW,
100, 100, (r.right-r.left), (r.bottom-r.top),
0, 0, hInst, 0);
if(!hwnd) return 0;
ShowWindow(hwnd, SW_SHOW);
UpdateWindow(hwnd);
// Init OpenGL first (we need a current GL context before creating CL shared context)
if(!gl_init_wgl(hwnd))
{
MessageBoxA(hwnd, "OpenGL init failed", "Error", MB_OK);
gl_release_all();
return 0;
}
if(!cl_init_glshare())
{
MessageBoxA(hwnd, "OpenCL GL-sharing init failed", "Error", MB_OK);
cl_release_all();
gl_release_all();
return 0;
}
// Message loop + continuous redraw (idle rendering)
MSG msg;
ZeroMemory(&msg, sizeof(msg));
while(msg.message != WM_QUIT)
{
while(PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
{
TranslateMessage(&msg);
DispatchMessage(&msg);
}
RenderFrame();
}
cl_release_all();
gl_release_all();
gHwnd = 0;
return 0;
}
// ===========================================================
// Input / Zoom
// ===========================================================
LRESULT CALLBACK WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
{
switch(msg)
{
case WM_RBUTTONDOWN:
// reset to full fractal
m_x = -2.5;
m_y = -2.0;
m_width = 4.0;
return 0;
case WM_LBUTTONDOWN:
{
RECT rect;
GetClientRect(hWnd, &rect);
int x = (int)(lParam & 0xFFFF);
int y = (int)((lParam >> 16) & 0xFFFF);
double zoom = 0.5;
double ax = (double)x / (double)(rect.right ? rect.right : 1);
double ay = (double)y / (double)(rect.bottom ? rect.bottom : 1);
m_x += m_width * (ax - 0.5);
m_y += m_width * (0.5 - ay);
m_width *= zoom;
return 0;
}
case WM_KEYDOWN:
if(wParam == VK_ESCAPE || wParam == VK_F12) {
PostMessage(hWnd, WM_CLOSE, 0, 0);
return 0;
}
return 0;
case WM_DESTROY:
PostQuitMessage(0);
return 0;
}
return DefWindowProc(hWnd, msg, wParam, lParam);
}
DLLFUNC int main()
{
return WinMain(GetModuleHandleA(NULL), NULL, GetCommandLineA(), SW_SHOWDEFAULT);
}