I reproduced the behaviour in a short example.
It uses "fisch1.mdl", "explo.wav", "explosin.wav", "explosion.wav" which I found in the 3DGS samples.
It does a simple animation of the model (could be any model) and plays a sound if you press a number key (1-3).
It turned out, that the delay is only the first time any sound is played, not for each sound. So it should not occur in the final version of the game, if a intro sound is played.

If you want to reproduce the effect, here is the code :


///////////////////////////////
#include <acknex.h>
#include <default.c>
#include <mtlFX.c>
///////////////////////////////

SOUND* Sound1 = "explo.wav";
SOUND* Sound2 = "explosin.wav";
SOUND* Sound3 = "explosion.wav";

// do some silly movements ...
action AnimateFish()
{
var degree = 0;
var movePercent = 0;

while (1)
{
degree+=time_step*5;
movePercent += time_step*30;
me.x = 100*sin(degree);
me.y = 100*cos(degree);
me.pan = -degree;
ent_animate(me,"",movePercent,ANM_CYCLE);
wait(1);
}
}

function Sound()
{
while(1)
{
if (key_1)
{
snd_play(Sound1,50,0);
}
if (key_2)
{
snd_play(Sound2,50,0);
}
if (key_3)
{
snd_play(Sound3,50,0);
}
wait(1);
}
}


function main()
{
video_screen = 1; // full screen
preload_mode = 3; // preload entities
level_load(NULL);
camera.x = -300;
camera.z = 100;
camera.tilt = -20;
ent_create("fisch1.mdl", vector(0,0,0), AnimateFish);
Sound();
}