Oh I see now, the matrix is correct but you forgot the perspective divide.

Try this:
struct VS_OUTPUT
{
float4 Pos : POSITION;
float4 texCoord : TEXCOORD0; // float 4
float3 lightVec : TEXCOORD1;
};


float4 tps = (mul(Pos, matMtl) * 0.005f) + 0.5f; // tps is float4 too.
tps.xyz /= tps.w;