Show More
Commit Description:
Various UI improvements.
Commit Description:
Various UI improvements.
References:
File last commit:
Show/Diff file:
Action:
FNA/lib/MojoShader/profiles/mojoshader_profile_arb1.c
2252 lines | 79.5 KiB | text/x-c | CLexer
2252 lines | 79.5 KiB | text/x-c | CLexer
r0 | /** | |||
* MojoShader; generate shader programs from bytecode of compiled | ||||
* Direct3D shaders. | ||||
* | ||||
* Please see the file LICENSE.txt in the source's root directory. | ||||
* | ||||
* This file written by Ryan C. Gordon. | ||||
*/ | ||||
#define __MOJOSHADER_INTERNAL__ 1 | ||||
#include "mojoshader_profile.h" | ||||
#pragma GCC visibility push(hidden) | ||||
#if SUPPORT_PROFILE_ARB1 | ||||
static inline const char *get_ARB1_register_string(Context *ctx, | ||||
const RegisterType regtype, const int regnum, | ||||
char *regnum_str, const size_t regnum_size) | ||||
{ | ||||
// turns out these are identical at the moment. | ||||
return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size); | ||||
} // get_ARB1_register_string | ||||
int allocate_scratch_register(Context *ctx) | ||||
{ | ||||
const int retval = ctx->scratch_registers++; | ||||
if (retval >= ctx->max_scratch_registers) | ||||
ctx->max_scratch_registers = retval + 1; | ||||
return retval; | ||||
} // allocate_scratch_register | ||||
int allocate_branch_label(Context *ctx) | ||||
{ | ||||
return ctx->assigned_branch_labels++; | ||||
} // allocate_branch_label | ||||
const char *allocate_ARB1_scratch_reg_name(Context *ctx, char *buf, | ||||
const size_t buflen) | ||||
{ | ||||
const int scratch = allocate_scratch_register(ctx); | ||||
snprintf(buf, buflen, "scratch%d", scratch); | ||||
return buf; | ||||
} // allocate_ARB1_scratch_reg_name | ||||
static inline const char *get_ARB1_branch_label_name(Context *ctx, const int id, | ||||
char *buf, const size_t buflen) | ||||
{ | ||||
snprintf(buf, buflen, "branch_label%d", id); | ||||
return buf; | ||||
} // get_ARB1_branch_label_name | ||||
const char *get_ARB1_varname_in_buf(Context *ctx, const RegisterType rt, | ||||
const int regnum, char *buf, | ||||
const size_t buflen) | ||||
{ | ||||
// turns out these are identical at the moment. | ||||
return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen); | ||||
} // get_ARB1_varname_in_buf | ||||
const char *get_ARB1_varname(Context *ctx, const RegisterType rt, | ||||
const int regnum) | ||||
{ | ||||
// turns out these are identical at the moment. | ||||
return get_D3D_varname(ctx, rt, regnum); | ||||
} // get_ARB1_varname | ||||
static inline const char *get_ARB1_const_array_varname_in_buf(Context *ctx, | ||||
const int base, const int size, | ||||
char *buf, const size_t buflen) | ||||
{ | ||||
snprintf(buf, buflen, "c_array_%d_%d", base, size); | ||||
return buf; | ||||
} // get_ARB1_const_array_varname_in_buf | ||||
const char *get_ARB1_const_array_varname(Context *ctx, int base, int size) | ||||
{ | ||||
char buf[64]; | ||||
get_ARB1_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf)); | ||||
return StrDup(ctx, buf); | ||||
} // get_ARB1_const_array_varname | ||||
const char *make_ARB1_srcarg_string_in_buf(Context *ctx, | ||||
const SourceArgInfo *arg, | ||||
char *buf, size_t buflen) | ||||
{ | ||||
// !!! FIXME: this can hit pathological cases where we look like this... | ||||
// | ||||
// dp3 r1.xyz, t0_bx2, t0_bx2 | ||||
// mad r1.xyz, t0_bias, 1-r1, t0_bx2 | ||||
// | ||||
// ...which do a lot of duplicate work in arb1... | ||||
// | ||||
// SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 }; | ||||
// MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 }; | ||||
// SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 }; | ||||
// MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 }; | ||||
// DP3 r1.xyz, scratch0, scratch1; | ||||
// SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 }; | ||||
// SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1; | ||||
// SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 }; | ||||
// MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 }; | ||||
// MAD r1.xyz, scratch0, scratch1, scratch2; | ||||
// | ||||
// ...notice that the dp3 calculates the same value into two scratch | ||||
// registers. This case is easier to handle; just see if multiple | ||||
// source args are identical, build it up once, and use the same | ||||
// scratch register for multiple arguments in that opcode. | ||||
// Even better still, only calculate things once across instructions, | ||||
// and be smart about letting it linger in a scratch register until we | ||||
// definitely don't need the calculation anymore. That's harder to | ||||
// write, though. | ||||
char regnum_str[16] = { '\0' }; | ||||
// !!! FIXME: use get_ARB1_varname_in_buf() instead? | ||||
const char *regtype_str = NULL; | ||||
if (!arg->relative) | ||||
{ | ||||
regtype_str = get_ARB1_register_string(ctx, arg->regtype, | ||||
arg->regnum, regnum_str, | ||||
sizeof (regnum_str)); | ||||
} // if | ||||
const char *rel_lbracket = ""; | ||||
char rel_offset[32] = { '\0' }; | ||||
const char *rel_rbracket = ""; | ||||
char rel_swizzle[4] = { '\0' }; | ||||
const char *rel_regtype_str = ""; | ||||
if (arg->relative) | ||||
{ | ||||
rel_regtype_str = get_ARB1_varname_in_buf(ctx, arg->relative_regtype, | ||||
arg->relative_regnum, | ||||
(char *) alloca(64), 64); | ||||
rel_swizzle[0] = '.'; | ||||
rel_swizzle[1] = swizzle_channels[arg->relative_component]; | ||||
rel_swizzle[2] = '\0'; | ||||
if (!support_nv2(ctx)) | ||||
{ | ||||
// The address register in ARB1 only allows the '.x' component, so | ||||
// we need to load the component we need from a temp vector | ||||
// register into .x as needed. | ||||
assert(arg->relative_regtype == REG_TYPE_ADDRESS); | ||||
assert(arg->relative_regnum == 0); | ||||
if (ctx->last_address_reg_component != arg->relative_component) | ||||
{ | ||||
output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str, | ||||
arg->relative_regnum, | ||||
swizzle_channels[arg->relative_component]); | ||||
ctx->last_address_reg_component = arg->relative_component; | ||||
} // if | ||||
rel_swizzle[1] = 'x'; | ||||
} // if | ||||
if (arg->regtype == REG_TYPE_INPUT) | ||||
regtype_str = "vertex.attrib"; | ||||
else | ||||
{ | ||||
assert(arg->regtype == REG_TYPE_CONST); | ||||
const int arrayidx = arg->relative_array->index; | ||||
const int arraysize = arg->relative_array->count; | ||||
const int offset = arg->regnum - arrayidx; | ||||
assert(offset >= 0); | ||||
regtype_str = get_ARB1_const_array_varname_in_buf(ctx, arrayidx, | ||||
arraysize, (char *) alloca(64), 64); | ||||
if (offset != 0) | ||||
snprintf(rel_offset, sizeof (rel_offset), " + %d", offset); | ||||
} // else | ||||
rel_lbracket = "["; | ||||
rel_rbracket = "]"; | ||||
} // if | ||||
// This is the source register with everything but swizzle and source mods. | ||||
snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str, | ||||
rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset, | ||||
rel_rbracket); | ||||
// Some of the source mods need to generate instructions to a temp | ||||
// register, in which case we'll replace the register name. | ||||
const SourceMod mod = arg->src_mod; | ||||
const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) || | ||||
((mod == SRCMOD_ABS) && support_nv2(ctx)) ); | ||||
if (!inplace) | ||||
{ | ||||
const size_t len = 64; | ||||
char *stackbuf = (char *) alloca(len); | ||||
regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len); | ||||
regnum_str[0] = '\0'; // move value to scratch register. | ||||
rel_lbracket = ""; // scratch register won't use array. | ||||
rel_rbracket = ""; | ||||
rel_offset[0] = '\0'; | ||||
rel_swizzle[0] = '\0'; | ||||
rel_regtype_str = ""; | ||||
} // if | ||||
const char *premod_str = ""; | ||||
const char *postmod_str = ""; | ||||
switch (mod) | ||||
{ | ||||
case SRCMOD_NEGATE: | ||||
premod_str = "-"; | ||||
break; | ||||
case SRCMOD_BIASNEGATE: | ||||
premod_str = "-"; | ||||
// fall through. | ||||
case SRCMOD_BIAS: | ||||
output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", | ||||
regtype_str, buf); | ||||
break; | ||||
case SRCMOD_SIGNNEGATE: | ||||
premod_str = "-"; | ||||
// fall through. | ||||
case SRCMOD_SIGN: | ||||
output_line(ctx, | ||||
"MAD %s, %s, { 2.0, 2.0, 2.0, 2.0 }, { -1.0, -1.0, -1.0, -1.0 };", | ||||
regtype_str, buf); | ||||
break; | ||||
case SRCMOD_COMPLEMENT: | ||||
output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;", | ||||
regtype_str, buf); | ||||
break; | ||||
case SRCMOD_X2NEGATE: | ||||
premod_str = "-"; | ||||
// fall through. | ||||
case SRCMOD_X2: | ||||
output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", | ||||
regtype_str, buf); | ||||
break; | ||||
case SRCMOD_DZ: | ||||
fail(ctx, "SRCMOD_DZ currently unsupported in arb1"); | ||||
postmod_str = "_dz"; | ||||
break; | ||||
case SRCMOD_DW: | ||||
fail(ctx, "SRCMOD_DW currently unsupported in arb1"); | ||||
postmod_str = "_dw"; | ||||
break; | ||||
case SRCMOD_ABSNEGATE: | ||||
premod_str = "-"; | ||||
// fall through. | ||||
case SRCMOD_ABS: | ||||
if (!support_nv2(ctx)) // GL_NV_vertex_program2_option adds this. | ||||
output_line(ctx, "ABS %s, %s;", regtype_str, buf); | ||||
else | ||||
{ | ||||
premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|"; | ||||
postmod_str = "|"; | ||||
} // else | ||||
break; | ||||
case SRCMOD_NOT: | ||||
fail(ctx, "SRCMOD_NOT currently unsupported in arb1"); | ||||
premod_str = "!"; | ||||
break; | ||||
case SRCMOD_NONE: | ||||
case SRCMOD_TOTAL: | ||||
break; // stop compiler whining. | ||||
} // switch | ||||
char swizzle_str[6]; | ||||
size_t i = 0; | ||||
if (support_nv4(ctx)) // vFace must be output as "vFace.x" in nv4. | ||||
{ | ||||
if (arg->regtype == REG_TYPE_MISCTYPE) | ||||
{ | ||||
if ( ((const MiscTypeType) arg->regnum) == MISCTYPE_TYPE_FACE ) | ||||
{ | ||||
swizzle_str[i++] = '.'; | ||||
swizzle_str[i++] = 'x'; | ||||
} // if | ||||
} // if | ||||
} // if | ||||
const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); | ||||
if (!scalar && !no_swizzle(arg->swizzle)) | ||||
{ | ||||
swizzle_str[i++] = '.'; | ||||
// .xxxx is the same as .x, but .xx is illegal...scalar or full! | ||||
if (replicate_swizzle(arg->swizzle)) | ||||
swizzle_str[i++] = swizzle_channels[arg->swizzle_x]; | ||||
else | ||||
{ | ||||
swizzle_str[i++] = swizzle_channels[arg->swizzle_x]; | ||||
swizzle_str[i++] = swizzle_channels[arg->swizzle_y]; | ||||
swizzle_str[i++] = swizzle_channels[arg->swizzle_z]; | ||||
swizzle_str[i++] = swizzle_channels[arg->swizzle_w]; | ||||
} // else | ||||
} // if | ||||
swizzle_str[i] = '\0'; | ||||
assert(i < sizeof (swizzle_str)); | ||||
snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str, | ||||
regtype_str, regnum_str, rel_lbracket, | ||||
rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket, | ||||
swizzle_str, postmod_str); | ||||
// !!! FIXME: make sure the scratch buffer was large enough. | ||||
return buf; | ||||
} // make_ARB1_srcarg_string_in_buf | ||||
const char *get_ARB1_destarg_varname(Context *ctx, char *buf, | ||||
const size_t buflen) | ||||
{ | ||||
const DestArgInfo *arg = &ctx->dest_arg; | ||||
return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen); | ||||
} // get_ARB1_destarg_varname | ||||
const char *get_ARB1_srcarg_varname(Context *ctx, const size_t idx, | ||||
char *buf, const size_t buflen) | ||||
{ | ||||
if (idx >= STATICARRAYLEN(ctx->source_args)) | ||||
{ | ||||
fail(ctx, "Too many source args"); | ||||
*buf = '\0'; | ||||
return buf; | ||||
} // if | ||||
const SourceArgInfo *arg = &ctx->source_args[idx]; | ||||
return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen); | ||||
} // get_ARB1_srcarg_varname | ||||
const char *make_ARB1_destarg_string(Context *ctx, char *buf, | ||||
const size_t buflen) | ||||
{ | ||||
const DestArgInfo *arg = &ctx->dest_arg; | ||||
*buf = '\0'; | ||||
const char *sat_str = ""; | ||||
if (arg->result_mod & MOD_SATURATE) | ||||
{ | ||||
// nv4 can use ".SAT" in all program types. | ||||
// For less than nv4, the "_SAT" modifier is only available in | ||||
// fragment shaders. Every thing else will fake it later in | ||||
// emit_ARB1_dest_modifiers() ... | ||||
if (support_nv4(ctx)) | ||||
sat_str = ".SAT"; | ||||
else if (shader_is_pixel(ctx)) | ||||
sat_str = "_SAT"; | ||||
} // if | ||||
const char *pp_str = ""; | ||||
if (arg->result_mod & MOD_PP) | ||||
{ | ||||
// Most ARB1 profiles can't do partial precision (MOD_PP), but that's | ||||
// okay. The spec says lots of Direct3D implementations ignore the | ||||
// flag anyhow. | ||||
if (support_nv4(ctx)) | ||||
pp_str = "H"; | ||||
} // if | ||||
// CENTROID only allowed in DCL opcodes, which shouldn't come through here. | ||||
assert((arg->result_mod & MOD_CENTROID) == 0); | ||||
char regnum_str[16]; | ||||
const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype, | ||||
arg->regnum, regnum_str, | ||||
sizeof (regnum_str)); | ||||
if (regtype_str == NULL) | ||||
{ | ||||
fail(ctx, "Unknown destination register type."); | ||||
return buf; | ||||
} // if | ||||
char writemask_str[6]; | ||||
size_t i = 0; | ||||
const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum); | ||||
if (!scalar && !writemask_xyzw(arg->writemask)) | ||||
{ | ||||
writemask_str[i++] = '.'; | ||||
if (arg->writemask0) writemask_str[i++] = 'x'; | ||||
if (arg->writemask1) writemask_str[i++] = 'y'; | ||||
if (arg->writemask2) writemask_str[i++] = 'z'; | ||||
if (arg->writemask3) writemask_str[i++] = 'w'; | ||||
} // if | ||||
writemask_str[i] = '\0'; | ||||
assert(i < sizeof (writemask_str)); | ||||
//const char *pred_left = ""; | ||||
//const char *pred_right = ""; | ||||
char pred[32] = { '\0' }; | ||||
if (ctx->predicated) | ||||
{ | ||||
fail(ctx, "dest register predication currently unsupported in arb1"); | ||||
return buf; | ||||
//pred_left = "("; | ||||
//pred_right = ") "; | ||||
make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg, | ||||
pred, sizeof (pred)); | ||||
} // if | ||||
snprintf(buf, buflen, "%s%s %s%s%s", pp_str, sat_str, | ||||
regtype_str, regnum_str, writemask_str); | ||||
// !!! FIXME: make sure the scratch buffer was large enough. | ||||
return buf; | ||||
} // make_ARB1_destarg_string | ||||
void emit_ARB1_dest_modifiers(Context *ctx) | ||||
{ | ||||
const DestArgInfo *arg = &ctx->dest_arg; | ||||
if (arg->result_shift != 0x0) | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
const char *multiplier = NULL; | ||||
switch (arg->result_shift) | ||||
{ | ||||
case 0x1: multiplier = "2.0"; break; | ||||
case 0x2: multiplier = "4.0"; break; | ||||
case 0x3: multiplier = "8.0"; break; | ||||
case 0xD: multiplier = "0.125"; break; | ||||
case 0xE: multiplier = "0.25"; break; | ||||
case 0xF: multiplier = "0.5"; break; | ||||
} // switch | ||||
if (multiplier != NULL) | ||||
{ | ||||
char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var)); | ||||
output_line(ctx, "MUL%s, %s, %s;", dst, var, multiplier); | ||||
} // if | ||||
} // if | ||||
if (arg->result_mod & MOD_SATURATE) | ||||
{ | ||||
// nv4 and/or pixel shaders just used the "SAT" modifier, instead. | ||||
if ( (!support_nv4(ctx)) && (!shader_is_pixel(ctx)) ) | ||||
{ | ||||
char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var)); | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
output_line(ctx, "MIN%s, %s, 1.0;", dst, var); | ||||
output_line(ctx, "MAX%s, %s, 0.0;", dst, var); | ||||
} // if | ||||
} // if | ||||
} // emit_ARB1_dest_modifiers | ||||
const char *make_ARB1_srcarg_string(Context *ctx, const size_t idx, | ||||
char *buf, const size_t buflen) | ||||
{ | ||||
if (idx >= STATICARRAYLEN(ctx->source_args)) | ||||
{ | ||||
fail(ctx, "Too many source args"); | ||||
*buf = '\0'; | ||||
return buf; | ||||
} // if | ||||
const SourceArgInfo *arg = &ctx->source_args[idx]; | ||||
return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, buflen); | ||||
} // make_ARB1_srcarg_string | ||||
void emit_ARB1_opcode_ds(Context *ctx, const char *opcode) | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
output_line(ctx, "%s%s, %s;", opcode, dst, src0); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_opcode_ds | ||||
void emit_ARB1_opcode_dss(Context *ctx, const char *opcode) | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); | ||||
output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_opcode_dss | ||||
void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode) | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); | ||||
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); | ||||
output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_opcode_dsss | ||||
#define EMIT_ARB1_OPCODE_FUNC(op) \ | ||||
void emit_ARB1_##op(Context *ctx) { \ | ||||
emit_ARB1_opcode(ctx, #op); \ | ||||
} | ||||
#define EMIT_ARB1_OPCODE_D_FUNC(op) \ | ||||
void emit_ARB1_##op(Context *ctx) { \ | ||||
emit_ARB1_opcode_d(ctx, #op); \ | ||||
} | ||||
#define EMIT_ARB1_OPCODE_S_FUNC(op) \ | ||||
void emit_ARB1_##op(Context *ctx) { \ | ||||
emit_ARB1_opcode_s(ctx, #op); \ | ||||
} | ||||
#define EMIT_ARB1_OPCODE_SS_FUNC(op) \ | ||||
void emit_ARB1_##op(Context *ctx) { \ | ||||
emit_ARB1_opcode_ss(ctx, #op); \ | ||||
} | ||||
#define EMIT_ARB1_OPCODE_DS_FUNC(op) \ | ||||
void emit_ARB1_##op(Context *ctx) { \ | ||||
emit_ARB1_opcode_ds(ctx, #op); \ | ||||
} | ||||
#define EMIT_ARB1_OPCODE_DSS_FUNC(op) \ | ||||
void emit_ARB1_##op(Context *ctx) { \ | ||||
emit_ARB1_opcode_dss(ctx, #op); \ | ||||
} | ||||
#define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \ | ||||
void emit_ARB1_##op(Context *ctx) { \ | ||||
emit_ARB1_opcode_dsss(ctx, #op); \ | ||||
} | ||||
#define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \ | ||||
void emit_ARB1_##op(Context *ctx) { \ | ||||
emit_ARB1_opcode_dssss(ctx, #op); \ | ||||
} | ||||
#define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \ | ||||
void emit_ARB1_##op(Context *ctx) { \ | ||||
failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \ | ||||
} | ||||
void emit_ARB1_start(Context *ctx, const char *profilestr) | ||||
{ | ||||
const char *shader_str = NULL; | ||||
const char *shader_full_str = NULL; | ||||
if (shader_is_vertex(ctx)) | ||||
{ | ||||
shader_str = "vp"; | ||||
shader_full_str = "vertex"; | ||||
} // if | ||||
else if (shader_is_pixel(ctx)) | ||||
{ | ||||
shader_str = "fp"; | ||||
shader_full_str = "fragment"; | ||||
} // else if | ||||
else | ||||
{ | ||||
failf(ctx, "Shader type %u unsupported in this profile.", | ||||
(uint) ctx->shader_type); | ||||
return; | ||||
} // if | ||||
set_output(ctx, &ctx->preflight); | ||||
if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0) | ||||
output_line(ctx, "!!ARB%s1.0", shader_str); | ||||
#if SUPPORT_PROFILE_ARB1_NV | ||||
else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0) | ||||
{ | ||||
ctx->profile_supports_nv2 = 1; | ||||
output_line(ctx, "!!ARB%s1.0", shader_str); | ||||
output_line(ctx, "OPTION NV_%s_program2;", shader_full_str); | ||||
} // else if | ||||
else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0) | ||||
{ | ||||
// there's no NV_fragment_program3, so just use 2. | ||||
const int ver = shader_is_pixel(ctx) ? 2 : 3; | ||||
ctx->profile_supports_nv2 = 1; | ||||
ctx->profile_supports_nv3 = 1; | ||||
output_line(ctx, "!!ARB%s1.0", shader_str); | ||||
output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver); | ||||
} // else if | ||||
else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV4) == 0) | ||||
{ | ||||
ctx->profile_supports_nv2 = 1; | ||||
ctx->profile_supports_nv3 = 1; | ||||
ctx->profile_supports_nv4 = 1; | ||||
output_line(ctx, "!!NV%s4.0", shader_str); | ||||
} // else if | ||||
#endif | ||||
else | ||||
{ | ||||
failf(ctx, "Profile '%s' unsupported or unknown.", profilestr); | ||||
} // else | ||||
set_output(ctx, &ctx->mainline); | ||||
} // emit_ARB1_start | ||||
void emit_ARB1_end(Context *ctx) | ||||
{ | ||||
// ps_1_* writes color to r0 instead oC0. We move it to the right place. | ||||
// We don't have to worry about a RET opcode messing this up, since | ||||
// RET isn't available before ps_2_0. | ||||
if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0)) | ||||
{ | ||||
set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1); | ||||
output_line(ctx, "MOV oC0, r0;"); | ||||
} // if | ||||
output_line(ctx, "END"); | ||||
} // emit_ARB1_end | ||||
void emit_ARB1_phase(Context *ctx) | ||||
{ | ||||
// no-op in arb1. | ||||
} // emit_ARB1_phase | ||||
static inline const char *arb1_float_temp(const Context *ctx) | ||||
{ | ||||
// nv4 lets you specify data type. | ||||
return (support_nv4(ctx)) ? "FLOAT TEMP" : "TEMP"; | ||||
} // arb1_float_temp | ||||
void emit_ARB1_finalize(Context *ctx) | ||||
{ | ||||
push_output(ctx, &ctx->preflight); | ||||
if (shader_is_vertex(ctx) && !ctx->arb1_wrote_position) | ||||
output_line(ctx, "OPTION ARB_position_invariant;"); | ||||
if (shader_is_pixel(ctx) && ctx->have_multi_color_outputs) | ||||
output_line(ctx, "OPTION ARB_draw_buffers;"); | ||||
pop_output(ctx); | ||||
const char *tmpstr = arb1_float_temp(ctx); | ||||
int i; | ||||
push_output(ctx, &ctx->globals); | ||||
for (i = 0; i < ctx->max_scratch_registers; i++) | ||||
{ | ||||
char buf[64]; | ||||
allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); | ||||
output_line(ctx, "%s %s;", tmpstr, buf); | ||||
} // for | ||||
// nv2 fragment programs (and anything nv4) have a real REP/ENDREP. | ||||
if ( (support_nv2(ctx)) && (!shader_is_pixel(ctx)) && (!support_nv4(ctx)) ) | ||||
{ | ||||
// set up temps for nv2 REP/ENDREP emulation through branching. | ||||
for (i = 0; i < ctx->max_reps; i++) | ||||
output_line(ctx, "TEMP rep%d;", i); | ||||
} // if | ||||
pop_output(ctx); | ||||
assert(ctx->scratch_registers == ctx->max_scratch_registers); | ||||
} // emit_ARB1_finalize | ||||
void emit_ARB1_global(Context *ctx, RegisterType regtype, int regnum) | ||||
{ | ||||
// !!! FIXME: dependency on ARB1 profile. // !!! FIXME about FIXME: huh? | ||||
char varname[64]; | ||||
get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); | ||||
push_output(ctx, &ctx->globals); | ||||
switch (regtype) | ||||
{ | ||||
case REG_TYPE_ADDRESS: | ||||
if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE. | ||||
{ | ||||
// We have to map texture registers to temps for ps_1_1, since | ||||
// they work like temps, initialize with tex coords, and the | ||||
// ps_1_1 TEX opcode expects to overwrite it. | ||||
if (!shader_version_atleast(ctx, 1, 4)) | ||||
{ | ||||
output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname); | ||||
push_output(ctx, &ctx->mainline_top); | ||||
output_line(ctx, "MOV %s, fragment.texcoord[%d];", | ||||
varname, regnum); | ||||
pop_output(ctx); | ||||
} // if | ||||
break; | ||||
} // if | ||||
// nv4 replaced address registers with generic int registers. | ||||
if (support_nv4(ctx)) | ||||
output_line(ctx, "INT TEMP %s;", varname); | ||||
else | ||||
{ | ||||
// nv2 has four-component address already, but stock arb1 has | ||||
// to emulate it in a temporary, and move components to the | ||||
// scalar ADDRESS register on demand. | ||||
output_line(ctx, "ADDRESS %s;", varname); | ||||
if (!support_nv2(ctx)) | ||||
output_line(ctx, "TEMP addr%d;", regnum); | ||||
} // else | ||||
break; | ||||
//case REG_TYPE_PREDICATE: | ||||
// output_line(ctx, "bvec4 %s;", varname); | ||||
// break; | ||||
case REG_TYPE_TEMP: | ||||
output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname); | ||||
break; | ||||
//case REG_TYPE_LOOP: | ||||
// break; // no-op. We declare these in for loops at the moment. | ||||
//case REG_TYPE_LABEL: | ||||
// break; // no-op. If we see it here, it means we optimized it out. | ||||
default: | ||||
fail(ctx, "BUG: we used a register we don't know how to define."); | ||||
break; | ||||
} // switch | ||||
pop_output(ctx); | ||||
} // emit_ARB1_global | ||||
void emit_ARB1_array(Context *ctx, VariableList *var) | ||||
{ | ||||
// All uniforms are now packed tightly into the program.local array, | ||||
// instead of trying to map them to the d3d registers. So this needs to | ||||
// map to the next piece of the array we haven't used yet. Thankfully, | ||||
// arb1 lets you make a PARAM array that maps to a subset of another | ||||
// array; we don't need to do offsets, since myarray[0] can map to | ||||
// program.local[5] without any extra math from us. | ||||
const int base = var->index; | ||||
const int size = var->count; | ||||
const int arb1base = ctx->uniform_float4_count + | ||||
ctx->uniform_int4_count + | ||||
ctx->uniform_bool_count; | ||||
char varname[64]; | ||||
get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname)); | ||||
push_output(ctx, &ctx->globals); | ||||
output_line(ctx, "PARAM %s[%d] = { program.local[%d..%d] };", varname, | ||||
size, arb1base, (arb1base + size) - 1); | ||||
pop_output(ctx); | ||||
var->emit_position = arb1base; | ||||
} // emit_ARB1_array | ||||
void emit_ARB1_const_array(Context *ctx, const ConstantsList *clist, | ||||
int base, int size) | ||||
{ | ||||
char varname[64]; | ||||
get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname)); | ||||
int i; | ||||
push_output(ctx, &ctx->globals); | ||||
output_line(ctx, "PARAM %s[%d] = {", varname, size); | ||||
ctx->indent++; | ||||
for (i = 0; i < size; i++) | ||||
{ | ||||
while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT) | ||||
clist = clist->next; | ||||
assert(clist->constant.index == (base + i)); | ||||
char val0[32]; | ||||
char val1[32]; | ||||
char val2[32]; | ||||
char val3[32]; | ||||
floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1); | ||||
floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1); | ||||
floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1); | ||||
floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1); | ||||
output_line(ctx, "{ %s, %s, %s, %s }%s", val0, val1, val2, val3, | ||||
(i < (size-1)) ? "," : ""); | ||||
clist = clist->next; | ||||
} // for | ||||
ctx->indent--; | ||||
output_line(ctx, "};"); | ||||
pop_output(ctx); | ||||
} // emit_ARB1_const_array | ||||
void emit_ARB1_uniform(Context *ctx, RegisterType regtype, int regnum, | ||||
const VariableList *var) | ||||
{ | ||||
// We pack these down into the program.local array, so if we only use | ||||
// register c439, it'll actually map to program.local[0]. This will | ||||
// prevent overflows when we actually have enough resources to run. | ||||
const char *arrayname = "program.local"; | ||||
int index = 0; | ||||
char varname[64]; | ||||
get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); | ||||
push_output(ctx, &ctx->globals); | ||||
if (var == NULL) | ||||
{ | ||||
// all types share one array (rather, all types convert to float4). | ||||
index = ctx->uniform_float4_count + ctx->uniform_int4_count + | ||||
ctx->uniform_bool_count; | ||||
} // if | ||||
else | ||||
{ | ||||
const int arraybase = var->index; | ||||
if (var->constant) | ||||
{ | ||||
const int arraysize = var->count; | ||||
arrayname = get_ARB1_const_array_varname_in_buf(ctx, arraybase, | ||||
arraysize, (char *) alloca(64), 64); | ||||
index = (regnum - arraybase); | ||||
} // if | ||||
else | ||||
{ | ||||
assert(var->emit_position != -1); | ||||
index = (regnum - arraybase) + var->emit_position; | ||||
} // else | ||||
} // else | ||||
output_line(ctx, "PARAM %s = %s[%d];", varname, arrayname, index); | ||||
pop_output(ctx); | ||||
} // emit_ARB1_uniform | ||||
void emit_ARB1_sampler(Context *ctx,int stage,TextureType ttype,int tb) | ||||
{ | ||||
// this is mostly a no-op...you don't predeclare samplers in arb1. | ||||
if (tb) // This sampler used a ps_1_1 TEXBEM opcode? | ||||
{ | ||||
const int index = ctx->uniform_float4_count + ctx->uniform_int4_count + | ||||
ctx->uniform_bool_count; | ||||
char var[64]; | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var)); | ||||
push_output(ctx, &ctx->globals); | ||||
output_line(ctx, "PARAM %s_texbem = program.local[%d];", var, index); | ||||
output_line(ctx, "PARAM %s_texbeml = program.local[%d];", var, index+1); | ||||
pop_output(ctx); | ||||
ctx->uniform_float4_count += 2; | ||||
} // if | ||||
} // emit_ARB1_sampler | ||||
// !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute(). | ||||
void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum, | ||||
MOJOSHADER_usage usage, int index, int wmask, | ||||
int flags) | ||||
{ | ||||
// !!! FIXME: this function doesn't deal with write masks at all yet! | ||||
const char *usage_str = NULL; | ||||
const char *arrayleft = ""; | ||||
const char *arrayright = ""; | ||||
char index_str[16] = { '\0' }; | ||||
char varname[64]; | ||||
get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname)); | ||||
//assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed? | ||||
if (index != 0) // !!! FIXME: a lot of these MUST be zero. | ||||
snprintf(index_str, sizeof (index_str), "%u", (uint) index); | ||||
if (shader_is_vertex(ctx)) | ||||
{ | ||||
// pre-vs3 output registers. | ||||
// these don't ever happen in DCL opcodes, I think. Map to vs_3_* | ||||
// output registers. | ||||
if (!shader_version_atleast(ctx, 3, 0)) | ||||
{ | ||||
if (regtype == REG_TYPE_RASTOUT) | ||||
{ | ||||
regtype = REG_TYPE_OUTPUT; | ||||
index = regnum; | ||||
switch ((const RastOutType) regnum) | ||||
{ | ||||
case RASTOUT_TYPE_POSITION: | ||||
usage = MOJOSHADER_USAGE_POSITION; | ||||
break; | ||||
case RASTOUT_TYPE_FOG: | ||||
usage = MOJOSHADER_USAGE_FOG; | ||||
break; | ||||
case RASTOUT_TYPE_POINT_SIZE: | ||||
usage = MOJOSHADER_USAGE_POINTSIZE; | ||||
break; | ||||
} // switch | ||||
} // if | ||||
else if (regtype == REG_TYPE_ATTROUT) | ||||
{ | ||||
regtype = REG_TYPE_OUTPUT; | ||||
usage = MOJOSHADER_USAGE_COLOR; | ||||
index = regnum; | ||||
} // else if | ||||
else if (regtype == REG_TYPE_TEXCRDOUT) | ||||
{ | ||||
regtype = REG_TYPE_OUTPUT; | ||||
usage = MOJOSHADER_USAGE_TEXCOORD; | ||||
index = regnum; | ||||
} // else if | ||||
} // if | ||||
// to avoid limitations of various GL entry points for input | ||||
// attributes (glSecondaryColorPointer() can only take 3 component | ||||
// items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other | ||||
// issues), we set up all inputs as generic vertex attributes, so we | ||||
// can pass data in just about any form, and ignore the built-in GLSL | ||||
// attributes like gl_SecondaryColor. Output needs to use the the | ||||
// built-ins, though, but we don't have to worry about the GL entry | ||||
// point limitations there. | ||||
if (regtype == REG_TYPE_INPUT) | ||||
{ | ||||
const int attr = ctx->assigned_vertex_attributes++; | ||||
push_output(ctx, &ctx->globals); | ||||
output_line(ctx, "ATTRIB %s = vertex.attrib[%d];", varname, attr); | ||||
pop_output(ctx); | ||||
} // if | ||||
else if (regtype == REG_TYPE_OUTPUT) | ||||
{ | ||||
switch (usage) | ||||
{ | ||||
case MOJOSHADER_USAGE_POSITION: | ||||
ctx->arb1_wrote_position = 1; | ||||
usage_str = "result.position"; | ||||
break; | ||||
case MOJOSHADER_USAGE_POINTSIZE: | ||||
usage_str = "result.pointsize"; | ||||
break; | ||||
case MOJOSHADER_USAGE_COLOR: | ||||
index_str[0] = '\0'; // no explicit number. | ||||
if (index == 0) | ||||
usage_str = "result.color.primary"; | ||||
else if (index == 1) | ||||
usage_str = "result.color.secondary"; | ||||
break; | ||||
case MOJOSHADER_USAGE_FOG: | ||||
usage_str = "result.fogcoord"; | ||||
break; | ||||
case MOJOSHADER_USAGE_TEXCOORD: | ||||
snprintf(index_str, sizeof (index_str), "%u", (uint) index); | ||||
usage_str = "result.texcoord"; | ||||
arrayleft = "["; | ||||
arrayright = "]"; | ||||
break; | ||||
default: | ||||
// !!! FIXME: we need to deal with some more built-in varyings here. | ||||
break; | ||||
} // switch | ||||
// !!! FIXME: the #define is a little hacky, but it means we don't | ||||
// !!! FIXME: have to track these separately if this works. | ||||
push_output(ctx, &ctx->globals); | ||||
// no mapping to built-in var? Just make it a regular global, pray. | ||||
if (usage_str == NULL) | ||||
output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname); | ||||
else | ||||
{ | ||||
output_line(ctx, "OUTPUT %s = %s%s%s%s;", varname, usage_str, | ||||
arrayleft, index_str, arrayright); | ||||
} // else | ||||
pop_output(ctx); | ||||
} // else if | ||||
else | ||||
{ | ||||
fail(ctx, "unknown vertex shader attribute register"); | ||||
} // else | ||||
} // if | ||||
else if (shader_is_pixel(ctx)) | ||||
{ | ||||
const char *paramtype_str = "ATTRIB"; | ||||
// samplers DCLs get handled in emit_ARB1_sampler(). | ||||
if (flags & MOD_CENTROID) | ||||
{ | ||||
if (!support_nv4(ctx)) // GL_NV_fragment_program4 adds centroid. | ||||
{ | ||||
// !!! FIXME: should we just wing it without centroid here? | ||||
failf(ctx, "centroid unsupported in %s profile", | ||||
ctx->profile->name); | ||||
return; | ||||
} // if | ||||
paramtype_str = "CENTROID ATTRIB"; | ||||
} // if | ||||
if (regtype == REG_TYPE_COLOROUT) | ||||
{ | ||||
paramtype_str = "OUTPUT"; | ||||
usage_str = "result.color"; | ||||
if (ctx->have_multi_color_outputs) | ||||
{ | ||||
// We have to gamble that you have GL_ARB_draw_buffers. | ||||
// You probably do at this point if you have a sane setup. | ||||
snprintf(index_str, sizeof (index_str), "%u", (uint) regnum); | ||||
arrayleft = "["; | ||||
arrayright = "]"; | ||||
} // if | ||||
} // if | ||||
else if (regtype == REG_TYPE_DEPTHOUT) | ||||
{ | ||||
paramtype_str = "OUTPUT"; | ||||
usage_str = "result.depth"; | ||||
} // else if | ||||
// !!! FIXME: can you actualy have a texture register with COLOR usage? | ||||
else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT)) | ||||
{ | ||||
if (usage == MOJOSHADER_USAGE_TEXCOORD) | ||||
{ | ||||
// ps_1_1 does a different hack for this attribute. | ||||
// Refer to emit_ARB1_global()'s REG_TYPE_TEXTURE code. | ||||
if (shader_version_atleast(ctx, 1, 4)) | ||||
{ | ||||
snprintf(index_str, sizeof (index_str), "%u", (uint) index); | ||||
usage_str = "fragment.texcoord"; | ||||
arrayleft = "["; | ||||
arrayright = "]"; | ||||
} // if | ||||
} // if | ||||
else if (usage == MOJOSHADER_USAGE_COLOR) | ||||
{ | ||||
index_str[0] = '\0'; // no explicit number. | ||||
if (index == 0) | ||||
usage_str = "fragment.color.primary"; | ||||
else if (index == 1) | ||||
usage_str = "fragment.color.secondary"; | ||||
else | ||||
fail(ctx, "unsupported color index"); | ||||
} // else if | ||||
} // else if | ||||
else if (regtype == REG_TYPE_MISCTYPE) | ||||
{ | ||||
const MiscTypeType mt = (MiscTypeType) regnum; | ||||
if (mt == MISCTYPE_TYPE_FACE) | ||||
{ | ||||
if (support_nv4(ctx)) // FINALLY, a vFace equivalent in nv4! | ||||
{ | ||||
index_str[0] = '\0'; // no explicit number. | ||||
usage_str = "fragment.facing"; | ||||
} // if | ||||
else | ||||
{ | ||||
failf(ctx, "vFace unsupported in %s profile", | ||||
ctx->profile->name); | ||||
} // else | ||||
} // if | ||||
else if (mt == MISCTYPE_TYPE_POSITION) | ||||
{ | ||||
index_str[0] = '\0'; // no explicit number. | ||||
usage_str = "fragment.position"; // !!! FIXME: is this the same coord space as D3D? | ||||
} // else if | ||||
else | ||||
{ | ||||
fail(ctx, "BUG: unhandled misc register"); | ||||
} // else | ||||
} // else if | ||||
else | ||||
{ | ||||
fail(ctx, "unknown pixel shader attribute register"); | ||||
} // else | ||||
if (usage_str != NULL) | ||||
{ | ||||
push_output(ctx, &ctx->globals); | ||||
output_line(ctx, "%s %s = %s%s%s%s;", paramtype_str, varname, | ||||
usage_str, arrayleft, index_str, arrayright); | ||||
pop_output(ctx); | ||||
} // if | ||||
} // else if | ||||
else | ||||
{ | ||||
fail(ctx, "Unknown shader type"); // state machine should catch this. | ||||
} // else | ||||
} // emit_ARB1_attribute | ||||
void emit_ARB1_RESERVED(Context *ctx) { /* no-op. */ } | ||||
void emit_ARB1_NOP(Context *ctx) | ||||
{ | ||||
// There is no NOP in arb1. Just don't output anything here. | ||||
} // emit_ARB1_NOP | ||||
EMIT_ARB1_OPCODE_DS_FUNC(MOV) | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(ADD) | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(SUB) | ||||
EMIT_ARB1_OPCODE_DSSS_FUNC(MAD) | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(MUL) | ||||
EMIT_ARB1_OPCODE_DS_FUNC(RCP) | ||||
void emit_ARB1_RSQ(Context *ctx) | ||||
{ | ||||
// nv4 doesn't force abs() on this, so negative values will generate NaN. | ||||
// The spec says you should force the abs() yourself. | ||||
if (!support_nv4(ctx)) | ||||
{ | ||||
emit_ARB1_opcode_ds(ctx, "RSQ"); // pre-nv4 implies ABS. | ||||
return; | ||||
} // if | ||||
// we can optimize this to use nv2's |abs| construct in some cases. | ||||
if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) || | ||||
(ctx->source_args[0].src_mod == SRCMOD_NEGATE) || | ||||
(ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) ) | ||||
ctx->source_args[0].src_mod = SRCMOD_ABS; | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
if (ctx->source_args[0].src_mod == SRCMOD_ABS) | ||||
output_line(ctx, "RSQ%s, %s;", dst, src0); | ||||
else | ||||
{ | ||||
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); | ||||
output_line(ctx, "ABS %s, %s;", buf, src0); | ||||
output_line(ctx, "RSQ%s, %s.x;", dst, buf); | ||||
} // else | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_RSQ | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(DP3) | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(DP4) | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(MIN) | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(MAX) | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(SLT) | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(SGE) | ||||
void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); } | ||||
static void arb1_log(Context *ctx, const char *opcode) | ||||
{ | ||||
// !!! FIXME: SRCMOD_NEGATE can be made into SRCMOD_ABS here, too | ||||
// we can optimize this to use nv2's |abs| construct in some cases. | ||||
if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) || | ||||
(ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) ) | ||||
ctx->source_args[0].src_mod = SRCMOD_ABS; | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
if (ctx->source_args[0].src_mod == SRCMOD_ABS) | ||||
output_line(ctx, "%s%s, %s;", opcode, dst, src0); | ||||
else | ||||
{ | ||||
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); | ||||
output_line(ctx, "ABS %s, %s;", buf, src0); | ||||
output_line(ctx, "%s%s, %s.x;", opcode, dst, buf); | ||||
} // else | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // arb1_log | ||||
void emit_ARB1_LOG(Context *ctx) | ||||
{ | ||||
arb1_log(ctx, "LG2"); | ||||
} // emit_ARB1_LOG | ||||
EMIT_ARB1_OPCODE_DS_FUNC(LIT) | ||||
EMIT_ARB1_OPCODE_DSS_FUNC(DST) | ||||
void emit_ARB1_LRP(Context *ctx) | ||||
{ | ||||
if (shader_is_pixel(ctx)) // fragment shaders have a matching LRP opcode. | ||||
emit_ARB1_opcode_dsss(ctx, "LRP"); | ||||
else | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); | ||||
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); | ||||
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); | ||||
// LRP is: dest = src2 + src0 * (src1 - src2) | ||||
output_line(ctx, "SUB %s, %s, %s;", buf, src1, src2); | ||||
output_line(ctx, "MAD%s, %s, %s, %s;", dst, buf, src0, src2); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // else | ||||
} // emit_ARB1_LRP | ||||
EMIT_ARB1_OPCODE_DS_FUNC(FRC) | ||||
static void arb1_MxXy(Context *ctx, const int x, const int y) | ||||
{ | ||||
DestArgInfo *dstarg = &ctx->dest_arg; | ||||
const int origmask = dstarg->writemask; | ||||
char src0[64]; | ||||
int i; | ||||
make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
for (i = 0; i < y; i++) | ||||
{ | ||||
char dst[64]; | ||||
char row[64]; | ||||
make_ARB1_srcarg_string(ctx, i + 1, row, sizeof (row)); | ||||
set_dstarg_writemask(dstarg, 1 << i); | ||||
make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
output_line(ctx, "DP%d%s, %s, %s;", x, dst, src0, row); | ||||
} // for | ||||
set_dstarg_writemask(dstarg, origmask); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // arb1_MxXy | ||||
void emit_ARB1_M4X4(Context *ctx) { arb1_MxXy(ctx, 4, 4); } | ||||
void emit_ARB1_M4X3(Context *ctx) { arb1_MxXy(ctx, 4, 3); } | ||||
void emit_ARB1_M3X4(Context *ctx) { arb1_MxXy(ctx, 3, 4); } | ||||
void emit_ARB1_M3X3(Context *ctx) { arb1_MxXy(ctx, 3, 3); } | ||||
void emit_ARB1_M3X2(Context *ctx) { arb1_MxXy(ctx, 3, 2); } | ||||
void emit_ARB1_CALL(Context *ctx) | ||||
{ | ||||
if (!support_nv2(ctx)) // no branching in stock ARB1. | ||||
{ | ||||
failf(ctx, "branching unsupported in %s profile", ctx->profile->name); | ||||
return; | ||||
} // if | ||||
char labelstr[64]; | ||||
get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr)); | ||||
output_line(ctx, "CAL %s;", labelstr); | ||||
} // emit_ARB1_CALL | ||||
void emit_ARB1_CALLNZ(Context *ctx) | ||||
{ | ||||
// !!! FIXME: if src1 is a constbool that's true, we can remove the | ||||
// !!! FIXME: if. If it's false, we can make this a no-op. | ||||
if (!support_nv2(ctx)) // no branching in stock ARB1. | ||||
failf(ctx, "branching unsupported in %s profile", ctx->profile->name); | ||||
else | ||||
{ | ||||
// !!! FIXME: double-check this. | ||||
char labelstr[64]; | ||||
char scratch[64]; | ||||
char src1[64]; | ||||
get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr)); | ||||
get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); | ||||
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); | ||||
output_line(ctx, "MOVC %s, %s;", scratch, src1); | ||||
output_line(ctx, "CAL %s (NE.x);", labelstr); | ||||
} // else | ||||
} // emit_ARB1_CALLNZ | ||||
// !!! FIXME: needs BRA in nv2, LOOP in nv2 fragment progs, and REP in nv4. | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP) | ||||
void emit_ARB1_RET(Context *ctx) | ||||
{ | ||||
// don't fail() if no nv2...maybe we're just ending the mainline? | ||||
// if we're ending a LABEL that had no CALL, this would all be written | ||||
// to ctx->ignore anyhow, so this should be "safe" ... arb1 profile will | ||||
// just end up throwing all this code out. | ||||
if (support_nv2(ctx)) // no branching in stock ARB1. | ||||
output_line(ctx, "RET;"); | ||||
set_output(ctx, &ctx->mainline); // in case we were ignoring this function. | ||||
} // emit_ARB1_RET | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP) | ||||
void emit_ARB1_LABEL(Context *ctx) | ||||
{ | ||||
if (!support_nv2(ctx)) // no branching in stock ARB1. | ||||
return; // don't fail()...maybe we never use it, but do fail in CALL. | ||||
const int label = ctx->source_args[0].regnum; | ||||
RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label); | ||||
// MSDN specs say CALL* has to come before the LABEL, so we know if we | ||||
// can ditch the entire function here as unused. | ||||
if (reg == NULL) | ||||
set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output. | ||||
// !!! FIXME: it would be nice if we could determine if a function is | ||||
// !!! FIXME: only called once and, if so, forcibly inline it. | ||||
//const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : ""; | ||||
char labelstr[64]; | ||||
get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr)); | ||||
output_line(ctx, "%s:", labelstr); | ||||
} // emit_ARB1_LABEL | ||||
void emit_ARB1_POW(Context *ctx) | ||||
{ | ||||
// we can optimize this to use nv2's |abs| construct in some cases. | ||||
if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) || | ||||
(ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) ) | ||||
ctx->source_args[0].src_mod = SRCMOD_ABS; | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); | ||||
if (ctx->source_args[0].src_mod == SRCMOD_ABS) | ||||
output_line(ctx, "POW%s, %s, %s;", dst, src0, src1); | ||||
else | ||||
{ | ||||
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); | ||||
output_line(ctx, "ABS %s, %s;", buf, src0); | ||||
output_line(ctx, "POW%s, %s.x, %s;", dst, buf, src1); | ||||
} // else | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_POW | ||||
void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); } | ||||
void emit_ARB1_SGN(Context *ctx) | ||||
{ | ||||
if (support_nv2(ctx)) | ||||
emit_ARB1_opcode_ds(ctx, "SSG"); | ||||
else | ||||
{ | ||||
char dst[64]; | ||||
char src0[64]; | ||||
char scratch1[64]; | ||||
char scratch2[64]; | ||||
make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
allocate_ARB1_scratch_reg_name(ctx, scratch1, sizeof (scratch1)); | ||||
allocate_ARB1_scratch_reg_name(ctx, scratch2, sizeof (scratch2)); | ||||
output_line(ctx, "SLT %s, %s, 0.0;", scratch1, src0); | ||||
output_line(ctx, "SLT %s, -%s, 0.0;", scratch2, src0); | ||||
output_line(ctx, "ADD%s -%s, %s;", dst, scratch1, scratch2); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // else | ||||
} // emit_ARB1_SGN | ||||
EMIT_ARB1_OPCODE_DS_FUNC(ABS) | ||||
void emit_ARB1_NRM(Context *ctx) | ||||
{ | ||||
// nv2 fragment programs (and anything nv4) have a real NRM. | ||||
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) | ||||
emit_ARB1_opcode_ds(ctx, "NRM"); | ||||
else | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); | ||||
output_line(ctx, "DP3 %s.w, %s, %s;", buf, src0, src0); | ||||
output_line(ctx, "RSQ %s.w, %s.w;", buf, buf); | ||||
output_line(ctx, "MUL%s, %s.w, %s;", dst, buf, src0); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // else | ||||
} // emit_ARB1_NRM | ||||
void emit_ARB1_SINCOS(Context *ctx) | ||||
{ | ||||
// we don't care about the temp registers that <= sm2 demands; ignore them. | ||||
const int mask = ctx->dest_arg.writemask; | ||||
// arb1 fragment programs and everything nv4 have sin/cos/sincos opcodes. | ||||
if ((shader_is_pixel(ctx)) || (support_nv4(ctx))) | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
if (writemask_x(mask)) | ||||
output_line(ctx, "COS%s, %s;", dst, src0); | ||||
else if (writemask_y(mask)) | ||||
output_line(ctx, "SIN%s, %s;", dst, src0); | ||||
else if (writemask_xy(mask)) | ||||
output_line(ctx, "SCS%s, %s;", dst, src0); | ||||
} // if | ||||
// nv2+ profiles have sin and cos opcodes. | ||||
else if (support_nv2(ctx)) | ||||
{ | ||||
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
if (writemask_x(mask)) | ||||
output_line(ctx, "COS %s.x, %s;", dst, src0); | ||||
else if (writemask_y(mask)) | ||||
output_line(ctx, "SIN %s.y, %s;", dst, src0); | ||||
else if (writemask_xy(mask)) | ||||
{ | ||||
output_line(ctx, "SIN %s.x, %s;", dst, src0); | ||||
output_line(ctx, "COS %s.y, %s;", dst, src0); | ||||
} // else if | ||||
} // if | ||||
else // big nasty. | ||||
{ | ||||
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); | ||||
const int need_sin = (writemask_x(mask) || writemask_xy(mask)); | ||||
const int need_cos = (writemask_y(mask) || writemask_xy(mask)); | ||||
char scratch[64]; | ||||
if (need_sin || need_cos) | ||||
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); | ||||
// These sin() and cos() approximations originally found here: | ||||
// http://www.devmaster.net/forums/showthread.php?t=5784 | ||||
// | ||||
// const float B = 4.0f / M_PI; | ||||
// const float C = -4.0f / (M_PI * M_PI); | ||||
// float y = B * x + C * x * fabs(x); | ||||
// | ||||
// // optional better precision... | ||||
// const float P = 0.225f; | ||||
// y = P * (y * fabs(y) - y) + y; | ||||
// | ||||
// | ||||
// That first thing can be reduced to: | ||||
// const float y = ((1.2732395447351626861510701069801f * x) + | ||||
// ((-0.40528473456935108577551785283891f * x) * fabs(x))); | ||||
if (need_sin) | ||||
{ | ||||
// !!! FIXME: use SRCMOD_ABS here? | ||||
output_line(ctx, "ABS %s.x, %s.x;", dst, src0); | ||||
output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst); | ||||
output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0); | ||||
output_line(ctx, "MAD %s.x, %s.x, %s.x, %s.x;", dst, dst, src0, scratch); | ||||
} // if | ||||
// cosine is sin(x + M_PI/2), but you have to wrap x to pi: | ||||
// if (x+(M_PI/2) > M_PI) | ||||
// x -= 2 * M_PI; | ||||
// | ||||
// which is... | ||||
// if (x+(1.57079637050628662109375) > 3.1415927410125732421875) | ||||
// x += -6.283185482025146484375; | ||||
if (need_cos) | ||||
{ | ||||
output_line(ctx, "ADD %s.x, %s.x, 1.57079637050628662109375;", scratch, src0); | ||||
output_line(ctx, "SGE %s.y, %s.x, 3.1415927410125732421875;", scratch, scratch); | ||||
output_line(ctx, "MAD %s.x, %s.y, -6.283185482025146484375, %s.x;", scratch, scratch, scratch); | ||||
output_line(ctx, "ABS %s.x, %s.x;", dst, src0); | ||||
output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst); | ||||
output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0); | ||||
output_line(ctx, "MAD %s.y, %s.x, %s.x, %s.x;", dst, dst, src0, scratch); | ||||
} // if | ||||
} // else | ||||
// !!! FIXME: might not have done anything. Don't emit if we didn't. | ||||
if (!(ctx->isfail)) | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_SINCOS | ||||
void emit_ARB1_REP(Context *ctx) | ||||
{ | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
// nv2 fragment programs (and everything nv4) have a real REP. | ||||
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) | ||||
output_line(ctx, "REP %s;", src0); | ||||
else if (support_nv2(ctx)) | ||||
{ | ||||
// no REP, but we can use branches. | ||||
char failbranch[32]; | ||||
char topbranch[32]; | ||||
const int toplabel = allocate_branch_label(ctx); | ||||
const int faillabel = allocate_branch_label(ctx); | ||||
get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch)); | ||||
get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch)); | ||||
assert(((size_t) ctx->branch_labels_stack_index) < | ||||
STATICARRAYLEN(ctx->branch_labels_stack)-1); | ||||
ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = toplabel; | ||||
ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = faillabel; | ||||
char scratch[32]; | ||||
snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps); | ||||
output_line(ctx, "MOVC %s.x, %s;", scratch, src0); | ||||
output_line(ctx, "BRA %s (LE.x);", failbranch); | ||||
output_line(ctx, "%s:", topbranch); | ||||
} // else if | ||||
else // stock ARB1 has no branching. | ||||
{ | ||||
fail(ctx, "branching unsupported in this profile"); | ||||
} // else | ||||
} // emit_ARB1_REP | ||||
void emit_ARB1_ENDREP(Context *ctx) | ||||
{ | ||||
// nv2 fragment programs (and everything nv4) have a real ENDREP. | ||||
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) | ||||
output_line(ctx, "ENDREP;"); | ||||
else if (support_nv2(ctx)) | ||||
{ | ||||
// no ENDREP, but we can use branches. | ||||
assert(ctx->branch_labels_stack_index >= 2); | ||||
char failbranch[32]; | ||||
char topbranch[32]; | ||||
const int faillabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index]; | ||||
const int toplabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index]; | ||||
get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch)); | ||||
get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch)); | ||||
char scratch[32]; | ||||
snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps); | ||||
output_line(ctx, "SUBC %s.x, %s.x, 1.0;", scratch, scratch); | ||||
output_line(ctx, "BRA %s (GT.x);", topbranch); | ||||
output_line(ctx, "%s:", failbranch); | ||||
} // else if | ||||
else // stock ARB1 has no branching. | ||||
{ | ||||
fail(ctx, "branching unsupported in this profile"); | ||||
} // else | ||||
} // emit_ARB1_ENDREP | ||||
void nv2_if(Context *ctx) | ||||
{ | ||||
// The condition code register MUST be set up before this! | ||||
// nv2 fragment programs (and everything nv4) have a real IF. | ||||
if ( (support_nv4(ctx)) || (shader_is_pixel(ctx)) ) | ||||
output_line(ctx, "IF EQ.x;"); | ||||
else | ||||
{ | ||||
// there's no IF construct, but we can use a branch to a label. | ||||
char failbranch[32]; | ||||
const int label = allocate_branch_label(ctx); | ||||
get_ARB1_branch_label_name(ctx, label, failbranch, sizeof (failbranch)); | ||||
assert(((size_t) ctx->branch_labels_stack_index) | ||||
< STATICARRAYLEN(ctx->branch_labels_stack)); | ||||
ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = label; | ||||
// !!! FIXME: should this be NE? (EQ would jump to the ELSE for the IF condition, right?). | ||||
output_line(ctx, "BRA %s (EQ.x);", failbranch); | ||||
} // else | ||||
} // nv2_if | ||||
void emit_ARB1_IF(Context *ctx) | ||||
{ | ||||
if (support_nv2(ctx)) | ||||
{ | ||||
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf)); | ||||
char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); | ||||
output_line(ctx, "MOVC %s.x, %s;", buf, src0); | ||||
nv2_if(ctx); | ||||
} // if | ||||
else // stock ARB1 has no branching. | ||||
{ | ||||
failf(ctx, "branching unsupported in %s profile", ctx->profile->name); | ||||
} // else | ||||
} // emit_ARB1_IF | ||||
void emit_ARB1_ELSE(Context *ctx) | ||||
{ | ||||
// nv2 fragment programs (and everything nv4) have a real ELSE. | ||||
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) | ||||
output_line(ctx, "ELSE;"); | ||||
else if (support_nv2(ctx)) | ||||
{ | ||||
// there's no ELSE construct, but we can use a branch to a label. | ||||
assert(ctx->branch_labels_stack_index > 0); | ||||
// At the end of the IF block, unconditionally jump to the ENDIF. | ||||
const int endlabel = allocate_branch_label(ctx); | ||||
char endbranch[32]; | ||||
get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch)); | ||||
output_line(ctx, "BRA %s;", endbranch); | ||||
// Now mark the ELSE section with a lable. | ||||
const int elselabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index-1]; | ||||
char elsebranch[32]; | ||||
get_ARB1_branch_label_name(ctx,elselabel,elsebranch,sizeof(elsebranch)); | ||||
output_line(ctx, "%s:", elsebranch); | ||||
// Replace the ELSE label with the ENDIF on the label stack. | ||||
ctx->branch_labels_stack[ctx->branch_labels_stack_index-1] = endlabel; | ||||
} // else if | ||||
else // stock ARB1 has no branching. | ||||
{ | ||||
failf(ctx, "branching unsupported in %s profile", ctx->profile->name); | ||||
} // else | ||||
} // emit_ARB1_ELSE | ||||
void emit_ARB1_ENDIF(Context *ctx) | ||||
{ | ||||
// nv2 fragment programs (and everything nv4) have a real ENDIF. | ||||
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) | ||||
output_line(ctx, "ENDIF;"); | ||||
else if (support_nv2(ctx)) | ||||
{ | ||||
// there's no ENDIF construct, but we can use a branch to a label. | ||||
assert(ctx->branch_labels_stack_index > 0); | ||||
const int endlabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index]; | ||||
char endbranch[32]; | ||||
get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch)); | ||||
output_line(ctx, "%s:", endbranch); | ||||
} // if | ||||
else // stock ARB1 has no branching. | ||||
{ | ||||
failf(ctx, "branching unsupported in %s profile", ctx->profile->name); | ||||
} // else | ||||
} // emit_ARB1_ENDIF | ||||
void emit_ARB1_BREAK(Context *ctx) | ||||
{ | ||||
// nv2 fragment programs (and everything nv4) have a real BREAK. | ||||
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) ) | ||||
output_line(ctx, "BRK;"); | ||||
else if (support_nv2(ctx)) | ||||
{ | ||||
// no BREAK, but we can use branches. | ||||
assert(ctx->branch_labels_stack_index >= 2); | ||||
const int faillabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index]; | ||||
char failbranch[32]; | ||||
get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch)); | ||||
output_line(ctx, "BRA %s;", failbranch); | ||||
} // else if | ||||
else // stock ARB1 has no branching. | ||||
{ | ||||
failf(ctx, "branching unsupported in %s profile", ctx->profile->name); | ||||
} // else | ||||
} // emit_ARB1_BREAK | ||||
void emit_ARB1_MOVA(Context *ctx) | ||||
{ | ||||
// nv2 and nv3 can use the ARR opcode. | ||||
// But nv4 removed ARR (and ADDRESS registers!). Just ROUND to an INT. | ||||
if (support_nv4(ctx)) | ||||
emit_ARB1_opcode_ds(ctx, "ROUND.S"); // !!! FIXME: don't use a modifier here. | ||||
else if ((support_nv2(ctx)) || (support_nv3(ctx))) | ||||
emit_ARB1_opcode_ds(ctx, "ARR"); | ||||
else | ||||
{ | ||||
char src0[64]; | ||||
char scratch[64]; | ||||
char addr[32]; | ||||
make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); | ||||
snprintf(addr, sizeof (addr), "addr%d", ctx->dest_arg.regnum); | ||||
// !!! FIXME: we can optimize this if src_mod is ABS or ABSNEGATE. | ||||
// ARL uses floor(), but D3D expects round-to-nearest. | ||||
// There is probably a more efficient way to do this. | ||||
if (shader_is_pixel(ctx)) // CMP only exists in fragment programs. :/ | ||||
output_line(ctx, "CMP %s, %s, -1.0, 1.0;", scratch, src0); | ||||
else | ||||
{ | ||||
output_line(ctx, "SLT %s, %s, 0.0;", scratch, src0); | ||||
output_line(ctx, "MAD %s, %s, -2.0, 1.0;", scratch, scratch); | ||||
} // else | ||||
output_line(ctx, "ABS %s, %s;", addr, src0); | ||||
output_line(ctx, "ADD %s, %s, 0.5;", addr, addr); | ||||
output_line(ctx, "FLR %s, %s;", addr, addr); | ||||
output_line(ctx, "MUL %s, %s, %s;", addr, addr, scratch); | ||||
// we don't handle these right now, since emit_ARB1_dest_modifiers(ctx) | ||||
// wants to look at dest_arg, not our temp register. | ||||
assert(ctx->dest_arg.result_mod == 0); | ||||
assert(ctx->dest_arg.result_shift == 0); | ||||
// we assign to the actual address register as needed. | ||||
ctx->last_address_reg_component = -1; | ||||
} // else | ||||
} // emit_ARB1_MOVA | ||||
void emit_ARB1_TEXKILL(Context *ctx) | ||||
{ | ||||
// d3d kills on xyz, arb1 kills on xyzw. Fix the swizzle. | ||||
// We just map the x component to w. If it's negative, the fragment | ||||
// would discard anyhow, otherwise, it'll pass through okay. This saves | ||||
// us a temp register. | ||||
char dst[64]; | ||||
get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
output_line(ctx, "KIL %s.xyzx;", dst); | ||||
} // emit_ARB1_TEXKILL | ||||
static void arb1_texbem(Context *ctx, const int luminance) | ||||
{ | ||||
// !!! FIXME: this code counts on the register not having swizzles, etc. | ||||
const int stage = ctx->dest_arg.regnum; | ||||
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
char src[64]; get_ARB1_srcarg_varname(ctx, 0, src, sizeof (src)); | ||||
char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); | ||||
char sampler[64]; | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, | ||||
sampler, sizeof (sampler)); | ||||
output_line(ctx, "MUL %s, %s_texbem.xzyw, %s.xyxy;", tmp, sampler, src); | ||||
output_line(ctx, "ADD %s.xy, %s.xzxx, %s.ywxx;", tmp, tmp, tmp); | ||||
output_line(ctx, "ADD %s.xy, %s, %s;", tmp, tmp, dst); | ||||
output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, tmp, stage); | ||||
if (luminance) // TEXBEML, not just TEXBEM? | ||||
{ | ||||
output_line(ctx, "MAD %s, %s.zzzz, %s_texbeml.xxxx, %s_texbeml.yyyy;", | ||||
tmp, src, sampler, sampler); | ||||
output_line(ctx, "MUL %s, %s, %s;", dst, dst, tmp); | ||||
} // if | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // arb1_texbem | ||||
void emit_ARB1_TEXBEM(Context *ctx) | ||||
{ | ||||
arb1_texbem(ctx, 0); | ||||
} // emit_ARB1_TEXBEM | ||||
void emit_ARB1_TEXBEML(Context *ctx) | ||||
{ | ||||
arb1_texbem(ctx, 1); | ||||
} // emit_ARB1_TEXBEML | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR) | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB) | ||||
void emit_ARB1_TEXM3X2PAD(Context *ctx) | ||||
{ | ||||
// no-op ... work happens in emit_ARB1_TEXM3X2TEX(). | ||||
} // emit_ARB1_TEXM3X2PAD | ||||
void emit_ARB1_TEXM3X2TEX(Context *ctx) | ||||
{ | ||||
if (ctx->texm3x2pad_src0 == -1) | ||||
return; | ||||
char dst[64]; | ||||
char src0[64]; | ||||
char src1[64]; | ||||
char src2[64]; | ||||
// !!! FIXME: this code counts on the register not having swizzles, etc. | ||||
const int stage = ctx->dest_arg.regnum; | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0, | ||||
src0, sizeof (src0)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0, | ||||
src1, sizeof (src1)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, | ||||
src2, sizeof (src2)); | ||||
get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, dst); | ||||
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); | ||||
output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, dst, stage); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_TEXM3X2TEX | ||||
void emit_ARB1_TEXM3X3PAD(Context *ctx) | ||||
{ | ||||
// no-op ... work happens in emit_ARB1_TEXM3X3*(). | ||||
} // emit_ARB1_TEXM3X3PAD | ||||
void emit_ARB1_TEXM3X3TEX(Context *ctx) | ||||
{ | ||||
if (ctx->texm3x3pad_src1 == -1) | ||||
return; | ||||
char dst[64]; | ||||
char src0[64]; | ||||
char src1[64]; | ||||
char src2[64]; | ||||
char src3[64]; | ||||
char src4[64]; | ||||
// !!! FIXME: this code counts on the register not having swizzles, etc. | ||||
const int stage = ctx->dest_arg.regnum; | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, | ||||
src0, sizeof (src0)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, | ||||
src1, sizeof (src1)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, | ||||
src2, sizeof (src2)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, | ||||
src3, sizeof (src3)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, | ||||
src4, sizeof (src4)); | ||||
get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); | ||||
const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); | ||||
const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D"; | ||||
output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); | ||||
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); | ||||
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); | ||||
output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, dst, stage, ttypestr); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_TEXM3X3TEX | ||||
void emit_ARB1_TEXM3X3SPEC(Context *ctx) | ||||
{ | ||||
if (ctx->texm3x3pad_src1 == -1) | ||||
return; | ||||
char dst[64]; | ||||
char src0[64]; | ||||
char src1[64]; | ||||
char src2[64]; | ||||
char src3[64]; | ||||
char src4[64]; | ||||
char src5[64]; | ||||
char tmp[64]; | ||||
char tmp2[64]; | ||||
// !!! FIXME: this code counts on the register not having swizzles, etc. | ||||
const int stage = ctx->dest_arg.regnum; | ||||
allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); | ||||
allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, | ||||
src0, sizeof (src0)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, | ||||
src1, sizeof (src1)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, | ||||
src2, sizeof (src2)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, | ||||
src3, sizeof (src3)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, | ||||
src4, sizeof (src4)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum, | ||||
src5, sizeof (src5)); | ||||
get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); | ||||
const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); | ||||
const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D"; | ||||
output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); | ||||
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); | ||||
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); | ||||
output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal | ||||
output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, src5); // normal * eyeray | ||||
// !!! FIXME: This is goofy. There's got to be a way to do vector-wide | ||||
// !!! FIXME: divides or reciprocals...right? | ||||
output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2); | ||||
output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2); | ||||
output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2); | ||||
output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2); | ||||
output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2); | ||||
output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp); | ||||
output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, src5); | ||||
output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_TEXM3X3SPEC | ||||
void emit_ARB1_TEXM3X3VSPEC(Context *ctx) | ||||
{ | ||||
if (ctx->texm3x3pad_src1 == -1) | ||||
return; | ||||
char dst[64]; | ||||
char src0[64]; | ||||
char src1[64]; | ||||
char src2[64]; | ||||
char src3[64]; | ||||
char src4[64]; | ||||
char tmp[64]; | ||||
char tmp2[64]; | ||||
char tmp3[64]; | ||||
// !!! FIXME: this code counts on the register not having swizzles, etc. | ||||
const int stage = ctx->dest_arg.regnum; | ||||
allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); | ||||
allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2)); | ||||
allocate_ARB1_scratch_reg_name(ctx, tmp3, sizeof (tmp3)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, | ||||
src0, sizeof (src0)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, | ||||
src1, sizeof (src1)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, | ||||
src2, sizeof (src2)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, | ||||
src3, sizeof (src3)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, | ||||
src4, sizeof (src4)); | ||||
get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage); | ||||
const TextureType ttype = (TextureType) (sreg ? sreg->index : 0); | ||||
const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D"; | ||||
output_line(ctx, "MOV %s.x, %s.w;", tmp3, src0); | ||||
output_line(ctx, "MOV %s.y, %s.w;", tmp3, src2); | ||||
output_line(ctx, "MOV %s.z, %s.w;", tmp3, dst); | ||||
output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); | ||||
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); | ||||
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); | ||||
output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal | ||||
output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, tmp3); // normal * eyeray | ||||
// !!! FIXME: This is goofy. There's got to be a way to do vector-wide | ||||
// !!! FIXME: divides or reciprocals...right? | ||||
output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2); | ||||
output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2); | ||||
output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2); | ||||
output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2); | ||||
output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2); | ||||
output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp); | ||||
output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, tmp3); | ||||
output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_TEXM3X3VSPEC | ||||
void emit_ARB1_EXPP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); } | ||||
void emit_ARB1_LOGP(Context *ctx) { arb1_log(ctx, "LG2"); } | ||||
void emit_ARB1_CND(Context *ctx) | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); | ||||
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); | ||||
char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp)); | ||||
// CND compares against 0.5, but we need to compare against 0.0... | ||||
// ...subtract to make up the difference. | ||||
output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", tmp, src0); | ||||
// D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just | ||||
// switch src1 and src2 to get the same results. | ||||
output_line(ctx, "CMP%s, %s, %s, %s;", dst, tmp, src2, src1); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_CND | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB) | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX) | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH) | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3) | ||||
void emit_ARB1_TEXM3X3(Context *ctx) | ||||
{ | ||||
if (ctx->texm3x3pad_src1 == -1) | ||||
return; | ||||
char dst[64]; | ||||
char src0[64]; | ||||
char src1[64]; | ||||
char src2[64]; | ||||
char src3[64]; | ||||
char src4[64]; | ||||
// !!! FIXME: this code counts on the register not having swizzles, etc. | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0, | ||||
src0, sizeof (src0)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0, | ||||
src1, sizeof (src1)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1, | ||||
src2, sizeof (src2)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1, | ||||
src3, sizeof (src3)); | ||||
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum, | ||||
src4, sizeof (src4)); | ||||
get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4); | ||||
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1); | ||||
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3); | ||||
output_line(ctx, "MOV %s.w, { 1.0, 1.0, 1.0, 1.0 };", dst); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_TEXM3X3 | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH) | ||||
void emit_ARB1_CMP(Context *ctx) | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); | ||||
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); | ||||
// D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just | ||||
// switch src1 and src2 to get the same results. | ||||
output_line(ctx, "CMP%s, %s, %s, %s;", dst, src0, src2, src1); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // emit_ARB1_CMP | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BEM) | ||||
void emit_ARB1_DP2ADD(Context *ctx) | ||||
{ | ||||
if (support_nv4(ctx)) // nv4 has a built-in equivalent to DP2ADD. | ||||
emit_ARB1_opcode_dsss(ctx, "DP2A"); | ||||
else | ||||
{ | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0)); | ||||
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1)); | ||||
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); | ||||
char scratch[64]; | ||||
// DP2ADD is: | ||||
// dst = (src0.r * src1.r) + (src0.g * src1.g) + src2.replicate_swiz | ||||
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); | ||||
output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1); | ||||
output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch); | ||||
output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2); | ||||
emit_ARB1_dest_modifiers(ctx); | ||||
} // else | ||||
} // emit_ARB1_DP2ADD | ||||
void emit_ARB1_DSX(Context *ctx) | ||||
{ | ||||
if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSX. | ||||
emit_ARB1_opcode_ds(ctx, "DDX"); | ||||
else | ||||
failf(ctx, "DSX unsupported in %s profile", ctx->profile->name); | ||||
} // emit_ARB1_DSX | ||||
void emit_ARB1_DSY(Context *ctx) | ||||
{ | ||||
if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSY. | ||||
emit_ARB1_opcode_ds(ctx, "DDY"); | ||||
else | ||||
failf(ctx, "DSY unsupported in %s profile", ctx->profile->name); | ||||
} // emit_ARB1_DSY | ||||
static void arb1_texld(Context *ctx, const char *opcode, const int texldd) | ||||
{ | ||||
// !!! FIXME: Hack: "TEXH" is invalid in nv4. Fix this more cleanly. | ||||
if ((ctx->dest_arg.result_mod & MOD_PP) && (support_nv4(ctx))) | ||||
ctx->dest_arg.result_mod &= ~MOD_PP; | ||||
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst)); | ||||
const int sm1 = !shader_version_atleast(ctx, 1, 4); | ||||
const int regnum = sm1 ? ctx->dest_arg.regnum : ctx->source_args[1].regnum; | ||||
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, regnum); | ||||
const char *ttype = NULL; | ||||
char src0[64]; | ||||
if (sm1) | ||||
get_ARB1_destarg_varname(ctx, src0, sizeof (src0)); | ||||
else | ||||
get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); | ||||
//char src1[64]; get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD? | ||||
char src2[64] = { 0 }; | ||||
char src3[64] = { 0 }; | ||||
if (texldd) | ||||
{ | ||||
make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2)); | ||||
make_ARB1_srcarg_string(ctx, 3, src3, sizeof (src3)); | ||||
} // if | ||||
// !!! FIXME: this should be in state_TEXLD, not in the arb1/glsl emitters. | ||||
if (sreg == NULL) | ||||
{ | ||||
fail(ctx, "TEXLD using undeclared sampler"); | ||||
return; | ||||
} // if | ||||
// SM1 only specifies dst, so don't check swizzle there. | ||||
if ( !sm1 && (!no_swizzle(ctx->source_args[1].swizzle)) ) | ||||
{ | ||||
// !!! FIXME: does this ever actually happen? | ||||
fail(ctx, "BUG: can't handle TEXLD with sampler swizzle at the moment"); | ||||
} // if | ||||
switch ((const TextureType) sreg->index) | ||||
{ | ||||
case TEXTURE_TYPE_2D: ttype = "2D"; break; // !!! FIXME: "RECT"? | ||||
case TEXTURE_TYPE_CUBE: ttype = "CUBE"; break; | ||||
case TEXTURE_TYPE_VOLUME: ttype = "3D"; break; | ||||
default: fail(ctx, "unknown texture type"); return; | ||||
} // switch | ||||
if (texldd) | ||||
{ | ||||
output_line(ctx, "%s%s, %s, %s, %s, texture[%d], %s;", opcode, dst, | ||||
src0, src2, src3, regnum, ttype); | ||||
} // if | ||||
else | ||||
{ | ||||
output_line(ctx, "%s%s, %s, texture[%d], %s;", opcode, dst, src0, | ||||
regnum, ttype); | ||||
} // else | ||||
} // arb1_texld | ||||
void emit_ARB1_TEXLDD(Context *ctx) | ||||
{ | ||||
// With GL_NV_fragment_program2, we can use the TXD opcode. | ||||
// In stock arb1, we can settle for a standard texld, which isn't | ||||
// perfect, but oh well. | ||||
if (support_nv2(ctx)) | ||||
arb1_texld(ctx, "TXD", 1); | ||||
else | ||||
arb1_texld(ctx, "TEX", 0); | ||||
} // emit_ARB1_TEXLDD | ||||
void emit_ARB1_TEXLDL(Context *ctx) | ||||
{ | ||||
if ((shader_is_vertex(ctx)) && (!support_nv3(ctx))) | ||||
{ | ||||
failf(ctx, "Vertex shader TEXLDL unsupported in %s profile", | ||||
ctx->profile->name); | ||||
return; | ||||
} // if | ||||
else if ((shader_is_pixel(ctx)) && (!support_nv2(ctx))) | ||||
{ | ||||
failf(ctx, "Pixel shader TEXLDL unsupported in %s profile", | ||||
ctx->profile->name); | ||||
return; | ||||
} // if | ||||
// !!! FIXME: this doesn't map exactly to TEXLDL. Review this. | ||||
arb1_texld(ctx, "TXL", 0); | ||||
} // emit_ARB1_TEXLDL | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKP) | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKC) | ||||
void emit_ARB1_IFC(Context *ctx) | ||||
{ | ||||
if (support_nv2(ctx)) | ||||
{ | ||||
const char *comps[] = { | ||||
"", "SGTC", "SEQC", "SGEC", "SGTC", "SNEC", "SLEC" | ||||
}; | ||||
if (ctx->instruction_controls >= STATICARRAYLEN(comps)) | ||||
{ | ||||
fail(ctx, "unknown comparison control"); | ||||
return; | ||||
} // if | ||||
char src0[64]; | ||||
char src1[64]; | ||||
char scratch[64]; | ||||
const char *comp = comps[ctx->instruction_controls]; | ||||
get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0)); | ||||
get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); | ||||
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch)); | ||||
output_line(ctx, "%s %s.x, %s, %s;", comp, scratch, src0, src1); | ||||
nv2_if(ctx); | ||||
} // if | ||||
else // stock ARB1 has no branching. | ||||
{ | ||||
failf(ctx, "branching unsupported in %s profile", ctx->profile->name); | ||||
} // else | ||||
} // emit_ARB1_IFC | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(SETP) | ||||
void emit_ARB1_DEF(Context *ctx) | ||||
{ | ||||
const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int? | ||||
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1); | ||||
char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1); | ||||
char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1); | ||||
char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1); | ||||
push_output(ctx, &ctx->globals); | ||||
output_line(ctx, "PARAM %s = { %s, %s, %s, %s };", | ||||
dst, val0, val1, val2, val3); | ||||
pop_output(ctx); | ||||
} // emit_ARB1_DEF | ||||
void emit_ARB1_DEFI(Context *ctx) | ||||
{ | ||||
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
const int32 *x = (const int32 *) ctx->dwords; | ||||
push_output(ctx, &ctx->globals); | ||||
output_line(ctx, "PARAM %s = { %d, %d, %d, %d };", | ||||
dst, (int) x[0], (int) x[1], (int) x[2], (int) x[3]); | ||||
pop_output(ctx); | ||||
} // emit_ARB1_DEFI | ||||
void emit_ARB1_DEFB(Context *ctx) | ||||
{ | ||||
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst)); | ||||
push_output(ctx, &ctx->globals); | ||||
output_line(ctx, "PARAM %s = %d;", dst, ctx->dwords[0] ? 1 : 0); | ||||
pop_output(ctx); | ||||
} // emit_ARB1_DEFB | ||||
void emit_ARB1_DCL(Context *ctx) | ||||
{ | ||||
// no-op. We do this in our emit_attribute() and emit_uniform(). | ||||
} // emit_ARB1_DCL | ||||
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD) | ||||
void emit_ARB1_TEXLD(Context *ctx) | ||||
{ | ||||
if (!shader_version_atleast(ctx, 1, 4)) | ||||
{ | ||||
arb1_texld(ctx, "TEX", 0); | ||||
return; | ||||
} // if | ||||
else if (!shader_version_atleast(ctx, 2, 0)) | ||||
{ | ||||
// ps_1_4 is different, too! | ||||
fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME | ||||
return; | ||||
} // if | ||||
// !!! FIXME: do texldb and texldp map between OpenGL and D3D correctly? | ||||
if (ctx->instruction_controls == CONTROL_TEXLD) | ||||
arb1_texld(ctx, "TEX", 0); | ||||
else if (ctx->instruction_controls == CONTROL_TEXLDP) | ||||
arb1_texld(ctx, "TXP", 0); | ||||
else if (ctx->instruction_controls == CONTROL_TEXLDB) | ||||
arb1_texld(ctx, "TXB", 0); | ||||
} // emit_ARB1_TEXLD | ||||
#undef EMIT_ARB1_OPCODE_FUNC | ||||
#undef EMIT_ARB1_OPCODE_D_FUNC | ||||
#undef EMIT_ARB1_OPCODE_S_FUNC | ||||
#undef EMIT_ARB1_OPCODE_SS_FUNC | ||||
#undef EMIT_ARB1_OPCODE_DS_FUNC | ||||
#undef EMIT_ARB1_OPCODE_DSS_FUNC | ||||
#undef EMIT_ARB1_OPCODE_DSSS_FUNC | ||||
#undef EMIT_ARB1_OPCODE_DSSSS_FUNC | ||||
#undef EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC | ||||
#endif // SUPPORT_PROFILE_ARB1 | ||||
#pragma GCC visibility pop | ||||