Ok, this is quite good looking now in my eyes,
and still very fast

Code:
////////////////////////////////////////////////////
// Simple shadow mapping shader
// Copyright (c) 2007 Conitec.
// (with reduced darkness..)
// based on modifications by Jibb Smart 
////////////////////////////////////////////////////

#define USE_PCF // use percentage closer filtering

//Tweakables
static const float fDark = 0.6;
static const float fDarkDiffuse = 0.2;
static const float fBright = 1.4;
static const float fDepthOffset = 0.99;
static const float fPCF = 0.9;

// Application fed data:
const float4x4 matWorldViewProj;	// World*view*projection matrix.
const float4x4 matWorld; // World matrix.
const float4x4 matMtl;   // Precalculated texture projection matrix
const float4 vecSunDir;	// Sun direction vector.

texture TargetMap;
texture entSkin1;
sampler DepthSampler = sampler_state { Texture = <TargetMap>; };
sampler TexSampler = sampler_state { Texture = <entSkin1>; Mipfilter = Linear; };

// Shadow mapping vertex shader
void ShadowVS (in float4 inPos: POSITION,
		in float2 inTex: TEXCOORD0,
		in float3 inNormal: NORMAL,
		out float4 outPos: POSITION,
		out float2 outTex: TEXCOORD0,
		out float3 outNormal: TEXCOORD1,
		out float4 outDepth: TEXCOORD2)
{
// Transform the vertex from object space to clip space:
	outPos = mul(inPos, matWorldViewProj);
	
// Transform the normal from object space to world space:
	outNormal = normalize(mul(inNormal,matWorld));
	
// Pass the texture coordinate to the pixel shader:
	outTex = inTex;
	
// Output the projective texture coordinates
	outDepth = mul( mul(inPos,matWorld), matMtl );
}

// distance comparison function
float fDist(float4 DepthCoord,float fDepth)
{
	return 
		tex2Dproj(DepthSampler,DepthCoord).r < (fDepth*fDepthOffset)? fDark : fBright;
}

#ifdef USE_PCF
static const float4 fTaps_PCF[9] = {
	{-1.0,-1.0, 0.0, 0.0},
	{-1.0, 0.0, 0.0, 0.0},
	{-1.0, 1.0, 0.0, 0.0},
	{ 0.0,-1.0, 0.0, 0.0},
	{ 0.0, 0.0, 0.0, 0.0},
	{ 0.0, 1.0, 0.0, 0.0},
	{ 1.0,-1.0, 0.0, 0.0},
	{ 1.0, 0.0, 0.0, 0.0},
	{ 1.0, 1.0, 0.0, 0.0}};
#endif

// Shadow mapping pixel shader
float4 ShadowPS (in float4 inPos: POSITION,
					in float2 inTex: TEXCOORD0,
					in float3 inNormal: TEXCOORD1,
					in float4 inDepth: TEXCOORD2) : COLOR0
{
// Calculate the diffuse term:

	float fDiffuse = lerp(fDarkDiffuse, fBright, saturate(dot(-vecSunDir, normalize(inNormal))));


// Calculate the shadow term
#ifdef USE_PCF
	float fShadow = 0.0;
	for (int i=0; i < 9; i++)
	{
		float4 fTap = inDepth + fPCF*fTaps_PCF[i];
		fShadow += fDist(fTap,inDepth.z)/9;
	}
#else
	float fShadow = fDist(inDepth,inDepth.z);
#endif		


		return tex2D(TexSampler,inTex) * min(fShadow, fDiffuse);
}

technique techShadow
{
	pass p0
	{
		VertexShader = compile vs_2_0 ShadowVS();
		PixelShader  = compile ps_2_0 ShadowPS();
	}
}