Show More
Commit Description:
Add timers for Simulation and various engines...
Commit Description:
Add timers for Simulation and various engines Starting to add additional timers for different stages of the process of updating in order to get more insight into what is slowing it down. The update takes 9ms, which is much longer than it used to. Engine-specific timers are coming later.
File last commit:
Show/Diff file:
Action:
FNA/lib/MojoShader/profiles/mojoshader_profile_arb1.c
2252 lines | 79.5 KiB | text/x-c | CLexer
/**
* MojoShader; generate shader programs from bytecode of compiled
* Direct3D shaders.
*
* Please see the file LICENSE.txt in the source's root directory.
*
* This file written by Ryan C. Gordon.
*/
#define __MOJOSHADER_INTERNAL__ 1
#include "mojoshader_profile.h"
#pragma GCC visibility push(hidden)
#if SUPPORT_PROFILE_ARB1
static inline const char *get_ARB1_register_string(Context *ctx,
const RegisterType regtype, const int regnum,
char *regnum_str, const size_t regnum_size)
{
// turns out these are identical at the moment.
return get_D3D_register_string(ctx,regtype,regnum,regnum_str,regnum_size);
} // get_ARB1_register_string
int allocate_scratch_register(Context *ctx)
{
const int retval = ctx->scratch_registers++;
if (retval >= ctx->max_scratch_registers)
ctx->max_scratch_registers = retval + 1;
return retval;
} // allocate_scratch_register
int allocate_branch_label(Context *ctx)
{
return ctx->assigned_branch_labels++;
} // allocate_branch_label
const char *allocate_ARB1_scratch_reg_name(Context *ctx, char *buf,
const size_t buflen)
{
const int scratch = allocate_scratch_register(ctx);
snprintf(buf, buflen, "scratch%d", scratch);
return buf;
} // allocate_ARB1_scratch_reg_name
static inline const char *get_ARB1_branch_label_name(Context *ctx, const int id,
char *buf, const size_t buflen)
{
snprintf(buf, buflen, "branch_label%d", id);
return buf;
} // get_ARB1_branch_label_name
const char *get_ARB1_varname_in_buf(Context *ctx, const RegisterType rt,
const int regnum, char *buf,
const size_t buflen)
{
// turns out these are identical at the moment.
return get_D3D_varname_in_buf(ctx, rt, regnum, buf, buflen);
} // get_ARB1_varname_in_buf
const char *get_ARB1_varname(Context *ctx, const RegisterType rt,
const int regnum)
{
// turns out these are identical at the moment.
return get_D3D_varname(ctx, rt, regnum);
} // get_ARB1_varname
static inline const char *get_ARB1_const_array_varname_in_buf(Context *ctx,
const int base, const int size,
char *buf, const size_t buflen)
{
snprintf(buf, buflen, "c_array_%d_%d", base, size);
return buf;
} // get_ARB1_const_array_varname_in_buf
const char *get_ARB1_const_array_varname(Context *ctx, int base, int size)
{
char buf[64];
get_ARB1_const_array_varname_in_buf(ctx, base, size, buf, sizeof (buf));
return StrDup(ctx, buf);
} // get_ARB1_const_array_varname
const char *make_ARB1_srcarg_string_in_buf(Context *ctx,
const SourceArgInfo *arg,
char *buf, size_t buflen)
{
// !!! FIXME: this can hit pathological cases where we look like this...
//
// dp3 r1.xyz, t0_bx2, t0_bx2
// mad r1.xyz, t0_bias, 1-r1, t0_bx2
//
// ...which do a lot of duplicate work in arb1...
//
// SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
// MUL scratch0, scratch0, { 2.0, 2.0, 2.0, 2.0 };
// SUB scratch1, t0, { 0.5, 0.5, 0.5, 0.5 };
// MUL scratch1, scratch1, { 2.0, 2.0, 2.0, 2.0 };
// DP3 r1.xyz, scratch0, scratch1;
// SUB scratch0, t0, { 0.5, 0.5, 0.5, 0.5 };
// SUB scratch1, { 1.0, 1.0, 1.0, 1.0 }, r1;
// SUB scratch2, t0, { 0.5, 0.5, 0.5, 0.5 };
// MUL scratch2, scratch2, { 2.0, 2.0, 2.0, 2.0 };
// MAD r1.xyz, scratch0, scratch1, scratch2;
//
// ...notice that the dp3 calculates the same value into two scratch
// registers. This case is easier to handle; just see if multiple
// source args are identical, build it up once, and use the same
// scratch register for multiple arguments in that opcode.
// Even better still, only calculate things once across instructions,
// and be smart about letting it linger in a scratch register until we
// definitely don't need the calculation anymore. That's harder to
// write, though.
char regnum_str[16] = { '\0' };
// !!! FIXME: use get_ARB1_varname_in_buf() instead?
const char *regtype_str = NULL;
if (!arg->relative)
{
regtype_str = get_ARB1_register_string(ctx, arg->regtype,
arg->regnum, regnum_str,
sizeof (regnum_str));
} // if
const char *rel_lbracket = "";
char rel_offset[32] = { '\0' };
const char *rel_rbracket = "";
char rel_swizzle[4] = { '\0' };
const char *rel_regtype_str = "";
if (arg->relative)
{
rel_regtype_str = get_ARB1_varname_in_buf(ctx, arg->relative_regtype,
arg->relative_regnum,
(char *) alloca(64), 64);
rel_swizzle[0] = '.';
rel_swizzle[1] = swizzle_channels[arg->relative_component];
rel_swizzle[2] = '\0';
if (!support_nv2(ctx))
{
// The address register in ARB1 only allows the '.x' component, so
// we need to load the component we need from a temp vector
// register into .x as needed.
assert(arg->relative_regtype == REG_TYPE_ADDRESS);
assert(arg->relative_regnum == 0);
if (ctx->last_address_reg_component != arg->relative_component)
{
output_line(ctx, "ARL %s.x, addr%d.%c;", rel_regtype_str,
arg->relative_regnum,
swizzle_channels[arg->relative_component]);
ctx->last_address_reg_component = arg->relative_component;
} // if
rel_swizzle[1] = 'x';
} // if
if (arg->regtype == REG_TYPE_INPUT)
regtype_str = "vertex.attrib";
else
{
assert(arg->regtype == REG_TYPE_CONST);
const int arrayidx = arg->relative_array->index;
const int arraysize = arg->relative_array->count;
const int offset = arg->regnum - arrayidx;
assert(offset >= 0);
regtype_str = get_ARB1_const_array_varname_in_buf(ctx, arrayidx,
arraysize, (char *) alloca(64), 64);
if (offset != 0)
snprintf(rel_offset, sizeof (rel_offset), " + %d", offset);
} // else
rel_lbracket = "[";
rel_rbracket = "]";
} // if
// This is the source register with everything but swizzle and source mods.
snprintf(buf, buflen, "%s%s%s%s%s%s%s", regtype_str, regnum_str,
rel_lbracket, rel_regtype_str, rel_swizzle, rel_offset,
rel_rbracket);
// Some of the source mods need to generate instructions to a temp
// register, in which case we'll replace the register name.
const SourceMod mod = arg->src_mod;
const int inplace = ( (mod == SRCMOD_NONE) || (mod == SRCMOD_NEGATE) ||
((mod == SRCMOD_ABS) && support_nv2(ctx)) );
if (!inplace)
{
const size_t len = 64;
char *stackbuf = (char *) alloca(len);
regtype_str = allocate_ARB1_scratch_reg_name(ctx, stackbuf, len);
regnum_str[0] = '\0'; // move value to scratch register.
rel_lbracket = ""; // scratch register won't use array.
rel_rbracket = "";
rel_offset[0] = '\0';
rel_swizzle[0] = '\0';
rel_regtype_str = "";
} // if
const char *premod_str = "";
const char *postmod_str = "";
switch (mod)
{
case SRCMOD_NEGATE:
premod_str = "-";
break;
case SRCMOD_BIASNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_BIAS:
output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };",
regtype_str, buf);
break;
case SRCMOD_SIGNNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_SIGN:
output_line(ctx,
"MAD %s, %s, { 2.0, 2.0, 2.0, 2.0 }, { -1.0, -1.0, -1.0, -1.0 };",
regtype_str, buf);
break;
case SRCMOD_COMPLEMENT:
output_line(ctx, "SUB %s, { 1.0, 1.0, 1.0, 1.0 }, %s;",
regtype_str, buf);
break;
case SRCMOD_X2NEGATE:
premod_str = "-";
// fall through.
case SRCMOD_X2:
output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };",
regtype_str, buf);
break;
case SRCMOD_DZ:
fail(ctx, "SRCMOD_DZ currently unsupported in arb1");
postmod_str = "_dz";
break;
case SRCMOD_DW:
fail(ctx, "SRCMOD_DW currently unsupported in arb1");
postmod_str = "_dw";
break;
case SRCMOD_ABSNEGATE:
premod_str = "-";
// fall through.
case SRCMOD_ABS:
if (!support_nv2(ctx)) // GL_NV_vertex_program2_option adds this.
output_line(ctx, "ABS %s, %s;", regtype_str, buf);
else
{
premod_str = (mod == SRCMOD_ABSNEGATE) ? "-|" : "|";
postmod_str = "|";
} // else
break;
case SRCMOD_NOT:
fail(ctx, "SRCMOD_NOT currently unsupported in arb1");
premod_str = "!";
break;
case SRCMOD_NONE:
case SRCMOD_TOTAL:
break; // stop compiler whining.
} // switch
char swizzle_str[6];
size_t i = 0;
if (support_nv4(ctx)) // vFace must be output as "vFace.x" in nv4.
{
if (arg->regtype == REG_TYPE_MISCTYPE)
{
if ( ((const MiscTypeType) arg->regnum) == MISCTYPE_TYPE_FACE )
{
swizzle_str[i++] = '.';
swizzle_str[i++] = 'x';
} // if
} // if
} // if
const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
if (!scalar && !no_swizzle(arg->swizzle))
{
swizzle_str[i++] = '.';
// .xxxx is the same as .x, but .xx is illegal...scalar or full!
if (replicate_swizzle(arg->swizzle))
swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
else
{
swizzle_str[i++] = swizzle_channels[arg->swizzle_x];
swizzle_str[i++] = swizzle_channels[arg->swizzle_y];
swizzle_str[i++] = swizzle_channels[arg->swizzle_z];
swizzle_str[i++] = swizzle_channels[arg->swizzle_w];
} // else
} // if
swizzle_str[i] = '\0';
assert(i < sizeof (swizzle_str));
snprintf(buf, buflen, "%s%s%s%s%s%s%s%s%s%s", premod_str,
regtype_str, regnum_str, rel_lbracket,
rel_regtype_str, rel_swizzle, rel_offset, rel_rbracket,
swizzle_str, postmod_str);
// !!! FIXME: make sure the scratch buffer was large enough.
return buf;
} // make_ARB1_srcarg_string_in_buf
const char *get_ARB1_destarg_varname(Context *ctx, char *buf,
const size_t buflen)
{
const DestArgInfo *arg = &ctx->dest_arg;
return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
} // get_ARB1_destarg_varname
const char *get_ARB1_srcarg_varname(Context *ctx, const size_t idx,
char *buf, const size_t buflen)
{
if (idx >= STATICARRAYLEN(ctx->source_args))
{
fail(ctx, "Too many source args");
*buf = '\0';
return buf;
} // if
const SourceArgInfo *arg = &ctx->source_args[idx];
return get_ARB1_varname_in_buf(ctx, arg->regtype, arg->regnum, buf, buflen);
} // get_ARB1_srcarg_varname
const char *make_ARB1_destarg_string(Context *ctx, char *buf,
const size_t buflen)
{
const DestArgInfo *arg = &ctx->dest_arg;
*buf = '\0';
const char *sat_str = "";
if (arg->result_mod & MOD_SATURATE)
{
// nv4 can use ".SAT" in all program types.
// For less than nv4, the "_SAT" modifier is only available in
// fragment shaders. Every thing else will fake it later in
// emit_ARB1_dest_modifiers() ...
if (support_nv4(ctx))
sat_str = ".SAT";
else if (shader_is_pixel(ctx))
sat_str = "_SAT";
} // if
const char *pp_str = "";
if (arg->result_mod & MOD_PP)
{
// Most ARB1 profiles can't do partial precision (MOD_PP), but that's
// okay. The spec says lots of Direct3D implementations ignore the
// flag anyhow.
if (support_nv4(ctx))
pp_str = "H";
} // if
// CENTROID only allowed in DCL opcodes, which shouldn't come through here.
assert((arg->result_mod & MOD_CENTROID) == 0);
char regnum_str[16];
const char *regtype_str = get_ARB1_register_string(ctx, arg->regtype,
arg->regnum, regnum_str,
sizeof (regnum_str));
if (regtype_str == NULL)
{
fail(ctx, "Unknown destination register type.");
return buf;
} // if
char writemask_str[6];
size_t i = 0;
const int scalar = isscalar(ctx, ctx->shader_type, arg->regtype, arg->regnum);
if (!scalar && !writemask_xyzw(arg->writemask))
{
writemask_str[i++] = '.';
if (arg->writemask0) writemask_str[i++] = 'x';
if (arg->writemask1) writemask_str[i++] = 'y';
if (arg->writemask2) writemask_str[i++] = 'z';
if (arg->writemask3) writemask_str[i++] = 'w';
} // if
writemask_str[i] = '\0';
assert(i < sizeof (writemask_str));
//const char *pred_left = "";
//const char *pred_right = "";
char pred[32] = { '\0' };
if (ctx->predicated)
{
fail(ctx, "dest register predication currently unsupported in arb1");
return buf;
//pred_left = "(";
//pred_right = ") ";
make_ARB1_srcarg_string_in_buf(ctx, &ctx->predicate_arg,
pred, sizeof (pred));
} // if
snprintf(buf, buflen, "%s%s %s%s%s", pp_str, sat_str,
regtype_str, regnum_str, writemask_str);
// !!! FIXME: make sure the scratch buffer was large enough.
return buf;
} // make_ARB1_destarg_string
void emit_ARB1_dest_modifiers(Context *ctx)
{
const DestArgInfo *arg = &ctx->dest_arg;
if (arg->result_shift != 0x0)
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
const char *multiplier = NULL;
switch (arg->result_shift)
{
case 0x1: multiplier = "2.0"; break;
case 0x2: multiplier = "4.0"; break;
case 0x3: multiplier = "8.0"; break;
case 0xD: multiplier = "0.125"; break;
case 0xE: multiplier = "0.25"; break;
case 0xF: multiplier = "0.5"; break;
} // switch
if (multiplier != NULL)
{
char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
output_line(ctx, "MUL%s, %s, %s;", dst, var, multiplier);
} // if
} // if
if (arg->result_mod & MOD_SATURATE)
{
// nv4 and/or pixel shaders just used the "SAT" modifier, instead.
if ( (!support_nv4(ctx)) && (!shader_is_pixel(ctx)) )
{
char var[64]; get_ARB1_destarg_varname(ctx, var, sizeof (var));
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
output_line(ctx, "MIN%s, %s, 1.0;", dst, var);
output_line(ctx, "MAX%s, %s, 0.0;", dst, var);
} // if
} // if
} // emit_ARB1_dest_modifiers
const char *make_ARB1_srcarg_string(Context *ctx, const size_t idx,
char *buf, const size_t buflen)
{
if (idx >= STATICARRAYLEN(ctx->source_args))
{
fail(ctx, "Too many source args");
*buf = '\0';
return buf;
} // if
const SourceArgInfo *arg = &ctx->source_args[idx];
return make_ARB1_srcarg_string_in_buf(ctx, arg, buf, buflen);
} // make_ARB1_srcarg_string
void emit_ARB1_opcode_ds(Context *ctx, const char *opcode)
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
output_line(ctx, "%s%s, %s;", opcode, dst, src0);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_opcode_ds
void emit_ARB1_opcode_dss(Context *ctx, const char *opcode)
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
output_line(ctx, "%s%s, %s, %s;", opcode, dst, src0, src1);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_opcode_dss
void emit_ARB1_opcode_dsss(Context *ctx, const char *opcode)
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
output_line(ctx, "%s%s, %s, %s, %s;", opcode, dst, src0, src1, src2);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_opcode_dsss
#define EMIT_ARB1_OPCODE_FUNC(op) \
void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_D_FUNC(op) \
void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_d(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_S_FUNC(op) \
void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_s(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_SS_FUNC(op) \
void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_ss(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_DS_FUNC(op) \
void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_ds(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_DSS_FUNC(op) \
void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_dss(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_DSSS_FUNC(op) \
void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_dsss(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_DSSSS_FUNC(op) \
void emit_ARB1_##op(Context *ctx) { \
emit_ARB1_opcode_dssss(ctx, #op); \
}
#define EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(op) \
void emit_ARB1_##op(Context *ctx) { \
failf(ctx, #op " unimplemented in %s profile", ctx->profile->name); \
}
void emit_ARB1_start(Context *ctx, const char *profilestr)
{
const char *shader_str = NULL;
const char *shader_full_str = NULL;
if (shader_is_vertex(ctx))
{
shader_str = "vp";
shader_full_str = "vertex";
} // if
else if (shader_is_pixel(ctx))
{
shader_str = "fp";
shader_full_str = "fragment";
} // else if
else
{
failf(ctx, "Shader type %u unsupported in this profile.",
(uint) ctx->shader_type);
return;
} // if
set_output(ctx, &ctx->preflight);
if (strcmp(profilestr, MOJOSHADER_PROFILE_ARB1) == 0)
output_line(ctx, "!!ARB%s1.0", shader_str);
#if SUPPORT_PROFILE_ARB1_NV
else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV2) == 0)
{
ctx->profile_supports_nv2 = 1;
output_line(ctx, "!!ARB%s1.0", shader_str);
output_line(ctx, "OPTION NV_%s_program2;", shader_full_str);
} // else if
else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV3) == 0)
{
// there's no NV_fragment_program3, so just use 2.
const int ver = shader_is_pixel(ctx) ? 2 : 3;
ctx->profile_supports_nv2 = 1;
ctx->profile_supports_nv3 = 1;
output_line(ctx, "!!ARB%s1.0", shader_str);
output_line(ctx, "OPTION NV_%s_program%d;", shader_full_str, ver);
} // else if
else if (strcmp(profilestr, MOJOSHADER_PROFILE_NV4) == 0)
{
ctx->profile_supports_nv2 = 1;
ctx->profile_supports_nv3 = 1;
ctx->profile_supports_nv4 = 1;
output_line(ctx, "!!NV%s4.0", shader_str);
} // else if
#endif
else
{
failf(ctx, "Profile '%s' unsupported or unknown.", profilestr);
} // else
set_output(ctx, &ctx->mainline);
} // emit_ARB1_start
void emit_ARB1_end(Context *ctx)
{
// ps_1_* writes color to r0 instead oC0. We move it to the right place.
// We don't have to worry about a RET opcode messing this up, since
// RET isn't available before ps_2_0.
if (shader_is_pixel(ctx) && !shader_version_atleast(ctx, 2, 0))
{
set_used_register(ctx, REG_TYPE_COLOROUT, 0, 1);
output_line(ctx, "MOV oC0, r0;");
} // if
output_line(ctx, "END");
} // emit_ARB1_end
void emit_ARB1_phase(Context *ctx)
{
// no-op in arb1.
} // emit_ARB1_phase
static inline const char *arb1_float_temp(const Context *ctx)
{
// nv4 lets you specify data type.
return (support_nv4(ctx)) ? "FLOAT TEMP" : "TEMP";
} // arb1_float_temp
void emit_ARB1_finalize(Context *ctx)
{
push_output(ctx, &ctx->preflight);
if (shader_is_vertex(ctx) && !ctx->arb1_wrote_position)
output_line(ctx, "OPTION ARB_position_invariant;");
if (shader_is_pixel(ctx) && ctx->have_multi_color_outputs)
output_line(ctx, "OPTION ARB_draw_buffers;");
pop_output(ctx);
const char *tmpstr = arb1_float_temp(ctx);
int i;
push_output(ctx, &ctx->globals);
for (i = 0; i < ctx->max_scratch_registers; i++)
{
char buf[64];
allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
output_line(ctx, "%s %s;", tmpstr, buf);
} // for
// nv2 fragment programs (and anything nv4) have a real REP/ENDREP.
if ( (support_nv2(ctx)) && (!shader_is_pixel(ctx)) && (!support_nv4(ctx)) )
{
// set up temps for nv2 REP/ENDREP emulation through branching.
for (i = 0; i < ctx->max_reps; i++)
output_line(ctx, "TEMP rep%d;", i);
} // if
pop_output(ctx);
assert(ctx->scratch_registers == ctx->max_scratch_registers);
} // emit_ARB1_finalize
void emit_ARB1_global(Context *ctx, RegisterType regtype, int regnum)
{
// !!! FIXME: dependency on ARB1 profile. // !!! FIXME about FIXME: huh?
char varname[64];
get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
push_output(ctx, &ctx->globals);
switch (regtype)
{
case REG_TYPE_ADDRESS:
if (shader_is_pixel(ctx)) // actually REG_TYPE_TEXTURE.
{
// We have to map texture registers to temps for ps_1_1, since
// they work like temps, initialize with tex coords, and the
// ps_1_1 TEX opcode expects to overwrite it.
if (!shader_version_atleast(ctx, 1, 4))
{
output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
push_output(ctx, &ctx->mainline_top);
output_line(ctx, "MOV %s, fragment.texcoord[%d];",
varname, regnum);
pop_output(ctx);
} // if
break;
} // if
// nv4 replaced address registers with generic int registers.
if (support_nv4(ctx))
output_line(ctx, "INT TEMP %s;", varname);
else
{
// nv2 has four-component address already, but stock arb1 has
// to emulate it in a temporary, and move components to the
// scalar ADDRESS register on demand.
output_line(ctx, "ADDRESS %s;", varname);
if (!support_nv2(ctx))
output_line(ctx, "TEMP addr%d;", regnum);
} // else
break;
//case REG_TYPE_PREDICATE:
// output_line(ctx, "bvec4 %s;", varname);
// break;
case REG_TYPE_TEMP:
output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
break;
//case REG_TYPE_LOOP:
// break; // no-op. We declare these in for loops at the moment.
//case REG_TYPE_LABEL:
// break; // no-op. If we see it here, it means we optimized it out.
default:
fail(ctx, "BUG: we used a register we don't know how to define.");
break;
} // switch
pop_output(ctx);
} // emit_ARB1_global
void emit_ARB1_array(Context *ctx, VariableList *var)
{
// All uniforms are now packed tightly into the program.local array,
// instead of trying to map them to the d3d registers. So this needs to
// map to the next piece of the array we haven't used yet. Thankfully,
// arb1 lets you make a PARAM array that maps to a subset of another
// array; we don't need to do offsets, since myarray[0] can map to
// program.local[5] without any extra math from us.
const int base = var->index;
const int size = var->count;
const int arb1base = ctx->uniform_float4_count +
ctx->uniform_int4_count +
ctx->uniform_bool_count;
char varname[64];
get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s[%d] = { program.local[%d..%d] };", varname,
size, arb1base, (arb1base + size) - 1);
pop_output(ctx);
var->emit_position = arb1base;
} // emit_ARB1_array
void emit_ARB1_const_array(Context *ctx, const ConstantsList *clist,
int base, int size)
{
char varname[64];
get_ARB1_const_array_varname_in_buf(ctx, base, size, varname, sizeof (varname));
int i;
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s[%d] = {", varname, size);
ctx->indent++;
for (i = 0; i < size; i++)
{
while (clist->constant.type != MOJOSHADER_UNIFORM_FLOAT)
clist = clist->next;
assert(clist->constant.index == (base + i));
char val0[32];
char val1[32];
char val2[32];
char val3[32];
floatstr(ctx, val0, sizeof (val0), clist->constant.value.f[0], 1);
floatstr(ctx, val1, sizeof (val1), clist->constant.value.f[1], 1);
floatstr(ctx, val2, sizeof (val2), clist->constant.value.f[2], 1);
floatstr(ctx, val3, sizeof (val3), clist->constant.value.f[3], 1);
output_line(ctx, "{ %s, %s, %s, %s }%s", val0, val1, val2, val3,
(i < (size-1)) ? "," : "");
clist = clist->next;
} // for
ctx->indent--;
output_line(ctx, "};");
pop_output(ctx);
} // emit_ARB1_const_array
void emit_ARB1_uniform(Context *ctx, RegisterType regtype, int regnum,
const VariableList *var)
{
// We pack these down into the program.local array, so if we only use
// register c439, it'll actually map to program.local[0]. This will
// prevent overflows when we actually have enough resources to run.
const char *arrayname = "program.local";
int index = 0;
char varname[64];
get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
push_output(ctx, &ctx->globals);
if (var == NULL)
{
// all types share one array (rather, all types convert to float4).
index = ctx->uniform_float4_count + ctx->uniform_int4_count +
ctx->uniform_bool_count;
} // if
else
{
const int arraybase = var->index;
if (var->constant)
{
const int arraysize = var->count;
arrayname = get_ARB1_const_array_varname_in_buf(ctx, arraybase,
arraysize, (char *) alloca(64), 64);
index = (regnum - arraybase);
} // if
else
{
assert(var->emit_position != -1);
index = (regnum - arraybase) + var->emit_position;
} // else
} // else
output_line(ctx, "PARAM %s = %s[%d];", varname, arrayname, index);
pop_output(ctx);
} // emit_ARB1_uniform
void emit_ARB1_sampler(Context *ctx,int stage,TextureType ttype,int tb)
{
// this is mostly a no-op...you don't predeclare samplers in arb1.
if (tb) // This sampler used a ps_1_1 TEXBEM opcode?
{
const int index = ctx->uniform_float4_count + ctx->uniform_int4_count +
ctx->uniform_bool_count;
char var[64];
get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage, var, sizeof(var));
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s_texbem = program.local[%d];", var, index);
output_line(ctx, "PARAM %s_texbeml = program.local[%d];", var, index+1);
pop_output(ctx);
ctx->uniform_float4_count += 2;
} // if
} // emit_ARB1_sampler
// !!! FIXME: a lot of cut-and-paste here from emit_GLSL_attribute().
void emit_ARB1_attribute(Context *ctx, RegisterType regtype, int regnum,
MOJOSHADER_usage usage, int index, int wmask,
int flags)
{
// !!! FIXME: this function doesn't deal with write masks at all yet!
const char *usage_str = NULL;
const char *arrayleft = "";
const char *arrayright = "";
char index_str[16] = { '\0' };
char varname[64];
get_ARB1_varname_in_buf(ctx, regtype, regnum, varname, sizeof (varname));
//assert((flags & MOD_PP) == 0); // !!! FIXME: is PP allowed?
if (index != 0) // !!! FIXME: a lot of these MUST be zero.
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
if (shader_is_vertex(ctx))
{
// pre-vs3 output registers.
// these don't ever happen in DCL opcodes, I think. Map to vs_3_*
// output registers.
if (!shader_version_atleast(ctx, 3, 0))
{
if (regtype == REG_TYPE_RASTOUT)
{
regtype = REG_TYPE_OUTPUT;
index = regnum;
switch ((const RastOutType) regnum)
{
case RASTOUT_TYPE_POSITION:
usage = MOJOSHADER_USAGE_POSITION;
break;
case RASTOUT_TYPE_FOG:
usage = MOJOSHADER_USAGE_FOG;
break;
case RASTOUT_TYPE_POINT_SIZE:
usage = MOJOSHADER_USAGE_POINTSIZE;
break;
} // switch
} // if
else if (regtype == REG_TYPE_ATTROUT)
{
regtype = REG_TYPE_OUTPUT;
usage = MOJOSHADER_USAGE_COLOR;
index = regnum;
} // else if
else if (regtype == REG_TYPE_TEXCRDOUT)
{
regtype = REG_TYPE_OUTPUT;
usage = MOJOSHADER_USAGE_TEXCOORD;
index = regnum;
} // else if
} // if
// to avoid limitations of various GL entry points for input
// attributes (glSecondaryColorPointer() can only take 3 component
// items, glVertexPointer() can't do GL_UNSIGNED_BYTE, many other
// issues), we set up all inputs as generic vertex attributes, so we
// can pass data in just about any form, and ignore the built-in GLSL
// attributes like gl_SecondaryColor. Output needs to use the the
// built-ins, though, but we don't have to worry about the GL entry
// point limitations there.
if (regtype == REG_TYPE_INPUT)
{
const int attr = ctx->assigned_vertex_attributes++;
push_output(ctx, &ctx->globals);
output_line(ctx, "ATTRIB %s = vertex.attrib[%d];", varname, attr);
pop_output(ctx);
} // if
else if (regtype == REG_TYPE_OUTPUT)
{
switch (usage)
{
case MOJOSHADER_USAGE_POSITION:
ctx->arb1_wrote_position = 1;
usage_str = "result.position";
break;
case MOJOSHADER_USAGE_POINTSIZE:
usage_str = "result.pointsize";
break;
case MOJOSHADER_USAGE_COLOR:
index_str[0] = '\0'; // no explicit number.
if (index == 0)
usage_str = "result.color.primary";
else if (index == 1)
usage_str = "result.color.secondary";
break;
case MOJOSHADER_USAGE_FOG:
usage_str = "result.fogcoord";
break;
case MOJOSHADER_USAGE_TEXCOORD:
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
usage_str = "result.texcoord";
arrayleft = "[";
arrayright = "]";
break;
default:
// !!! FIXME: we need to deal with some more built-in varyings here.
break;
} // switch
// !!! FIXME: the #define is a little hacky, but it means we don't
// !!! FIXME: have to track these separately if this works.
push_output(ctx, &ctx->globals);
// no mapping to built-in var? Just make it a regular global, pray.
if (usage_str == NULL)
output_line(ctx, "%s %s;", arb1_float_temp(ctx), varname);
else
{
output_line(ctx, "OUTPUT %s = %s%s%s%s;", varname, usage_str,
arrayleft, index_str, arrayright);
} // else
pop_output(ctx);
} // else if
else
{
fail(ctx, "unknown vertex shader attribute register");
} // else
} // if
else if (shader_is_pixel(ctx))
{
const char *paramtype_str = "ATTRIB";
// samplers DCLs get handled in emit_ARB1_sampler().
if (flags & MOD_CENTROID)
{
if (!support_nv4(ctx)) // GL_NV_fragment_program4 adds centroid.
{
// !!! FIXME: should we just wing it without centroid here?
failf(ctx, "centroid unsupported in %s profile",
ctx->profile->name);
return;
} // if
paramtype_str = "CENTROID ATTRIB";
} // if
if (regtype == REG_TYPE_COLOROUT)
{
paramtype_str = "OUTPUT";
usage_str = "result.color";
if (ctx->have_multi_color_outputs)
{
// We have to gamble that you have GL_ARB_draw_buffers.
// You probably do at this point if you have a sane setup.
snprintf(index_str, sizeof (index_str), "%u", (uint) regnum);
arrayleft = "[";
arrayright = "]";
} // if
} // if
else if (regtype == REG_TYPE_DEPTHOUT)
{
paramtype_str = "OUTPUT";
usage_str = "result.depth";
} // else if
// !!! FIXME: can you actualy have a texture register with COLOR usage?
else if ((regtype == REG_TYPE_TEXTURE) || (regtype == REG_TYPE_INPUT))
{
if (usage == MOJOSHADER_USAGE_TEXCOORD)
{
// ps_1_1 does a different hack for this attribute.
// Refer to emit_ARB1_global()'s REG_TYPE_TEXTURE code.
if (shader_version_atleast(ctx, 1, 4))
{
snprintf(index_str, sizeof (index_str), "%u", (uint) index);
usage_str = "fragment.texcoord";
arrayleft = "[";
arrayright = "]";
} // if
} // if
else if (usage == MOJOSHADER_USAGE_COLOR)
{
index_str[0] = '\0'; // no explicit number.
if (index == 0)
usage_str = "fragment.color.primary";
else if (index == 1)
usage_str = "fragment.color.secondary";
else
fail(ctx, "unsupported color index");
} // else if
} // else if
else if (regtype == REG_TYPE_MISCTYPE)
{
const MiscTypeType mt = (MiscTypeType) regnum;
if (mt == MISCTYPE_TYPE_FACE)
{
if (support_nv4(ctx)) // FINALLY, a vFace equivalent in nv4!
{
index_str[0] = '\0'; // no explicit number.
usage_str = "fragment.facing";
} // if
else
{
failf(ctx, "vFace unsupported in %s profile",
ctx->profile->name);
} // else
} // if
else if (mt == MISCTYPE_TYPE_POSITION)
{
index_str[0] = '\0'; // no explicit number.
usage_str = "fragment.position"; // !!! FIXME: is this the same coord space as D3D?
} // else if
else
{
fail(ctx, "BUG: unhandled misc register");
} // else
} // else if
else
{
fail(ctx, "unknown pixel shader attribute register");
} // else
if (usage_str != NULL)
{
push_output(ctx, &ctx->globals);
output_line(ctx, "%s %s = %s%s%s%s;", paramtype_str, varname,
usage_str, arrayleft, index_str, arrayright);
pop_output(ctx);
} // if
} // else if
else
{
fail(ctx, "Unknown shader type"); // state machine should catch this.
} // else
} // emit_ARB1_attribute
void emit_ARB1_RESERVED(Context *ctx) { /* no-op. */ }
void emit_ARB1_NOP(Context *ctx)
{
// There is no NOP in arb1. Just don't output anything here.
} // emit_ARB1_NOP
EMIT_ARB1_OPCODE_DS_FUNC(MOV)
EMIT_ARB1_OPCODE_DSS_FUNC(ADD)
EMIT_ARB1_OPCODE_DSS_FUNC(SUB)
EMIT_ARB1_OPCODE_DSSS_FUNC(MAD)
EMIT_ARB1_OPCODE_DSS_FUNC(MUL)
EMIT_ARB1_OPCODE_DS_FUNC(RCP)
void emit_ARB1_RSQ(Context *ctx)
{
// nv4 doesn't force abs() on this, so negative values will generate NaN.
// The spec says you should force the abs() yourself.
if (!support_nv4(ctx))
{
emit_ARB1_opcode_ds(ctx, "RSQ"); // pre-nv4 implies ABS.
return;
} // if
// we can optimize this to use nv2's |abs| construct in some cases.
if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
(ctx->source_args[0].src_mod == SRCMOD_NEGATE) ||
(ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
ctx->source_args[0].src_mod = SRCMOD_ABS;
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
if (ctx->source_args[0].src_mod == SRCMOD_ABS)
output_line(ctx, "RSQ%s, %s;", dst, src0);
else
{
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
output_line(ctx, "ABS %s, %s;", buf, src0);
output_line(ctx, "RSQ%s, %s.x;", dst, buf);
} // else
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_RSQ
EMIT_ARB1_OPCODE_DSS_FUNC(DP3)
EMIT_ARB1_OPCODE_DSS_FUNC(DP4)
EMIT_ARB1_OPCODE_DSS_FUNC(MIN)
EMIT_ARB1_OPCODE_DSS_FUNC(MAX)
EMIT_ARB1_OPCODE_DSS_FUNC(SLT)
EMIT_ARB1_OPCODE_DSS_FUNC(SGE)
void emit_ARB1_EXP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
static void arb1_log(Context *ctx, const char *opcode)
{
// !!! FIXME: SRCMOD_NEGATE can be made into SRCMOD_ABS here, too
// we can optimize this to use nv2's |abs| construct in some cases.
if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
(ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
ctx->source_args[0].src_mod = SRCMOD_ABS;
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
if (ctx->source_args[0].src_mod == SRCMOD_ABS)
output_line(ctx, "%s%s, %s;", opcode, dst, src0);
else
{
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
output_line(ctx, "ABS %s, %s;", buf, src0);
output_line(ctx, "%s%s, %s.x;", opcode, dst, buf);
} // else
emit_ARB1_dest_modifiers(ctx);
} // arb1_log
void emit_ARB1_LOG(Context *ctx)
{
arb1_log(ctx, "LG2");
} // emit_ARB1_LOG
EMIT_ARB1_OPCODE_DS_FUNC(LIT)
EMIT_ARB1_OPCODE_DSS_FUNC(DST)
void emit_ARB1_LRP(Context *ctx)
{
if (shader_is_pixel(ctx)) // fragment shaders have a matching LRP opcode.
emit_ARB1_opcode_dsss(ctx, "LRP");
else
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
// LRP is: dest = src2 + src0 * (src1 - src2)
output_line(ctx, "SUB %s, %s, %s;", buf, src1, src2);
output_line(ctx, "MAD%s, %s, %s, %s;", dst, buf, src0, src2);
emit_ARB1_dest_modifiers(ctx);
} // else
} // emit_ARB1_LRP
EMIT_ARB1_OPCODE_DS_FUNC(FRC)
static void arb1_MxXy(Context *ctx, const int x, const int y)
{
DestArgInfo *dstarg = &ctx->dest_arg;
const int origmask = dstarg->writemask;
char src0[64];
int i;
make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
for (i = 0; i < y; i++)
{
char dst[64];
char row[64];
make_ARB1_srcarg_string(ctx, i + 1, row, sizeof (row));
set_dstarg_writemask(dstarg, 1 << i);
make_ARB1_destarg_string(ctx, dst, sizeof (dst));
output_line(ctx, "DP%d%s, %s, %s;", x, dst, src0, row);
} // for
set_dstarg_writemask(dstarg, origmask);
emit_ARB1_dest_modifiers(ctx);
} // arb1_MxXy
void emit_ARB1_M4X4(Context *ctx) { arb1_MxXy(ctx, 4, 4); }
void emit_ARB1_M4X3(Context *ctx) { arb1_MxXy(ctx, 4, 3); }
void emit_ARB1_M3X4(Context *ctx) { arb1_MxXy(ctx, 3, 4); }
void emit_ARB1_M3X3(Context *ctx) { arb1_MxXy(ctx, 3, 3); }
void emit_ARB1_M3X2(Context *ctx) { arb1_MxXy(ctx, 3, 2); }
void emit_ARB1_CALL(Context *ctx)
{
if (!support_nv2(ctx)) // no branching in stock ARB1.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
return;
} // if
char labelstr[64];
get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
output_line(ctx, "CAL %s;", labelstr);
} // emit_ARB1_CALL
void emit_ARB1_CALLNZ(Context *ctx)
{
// !!! FIXME: if src1 is a constbool that's true, we can remove the
// !!! FIXME: if. If it's false, we can make this a no-op.
if (!support_nv2(ctx)) // no branching in stock ARB1.
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
else
{
// !!! FIXME: double-check this.
char labelstr[64];
char scratch[64];
char src1[64];
get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
output_line(ctx, "MOVC %s, %s;", scratch, src1);
output_line(ctx, "CAL %s (NE.x);", labelstr);
} // else
} // emit_ARB1_CALLNZ
// !!! FIXME: needs BRA in nv2, LOOP in nv2 fragment progs, and REP in nv4.
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(LOOP)
void emit_ARB1_RET(Context *ctx)
{
// don't fail() if no nv2...maybe we're just ending the mainline?
// if we're ending a LABEL that had no CALL, this would all be written
// to ctx->ignore anyhow, so this should be "safe" ... arb1 profile will
// just end up throwing all this code out.
if (support_nv2(ctx)) // no branching in stock ARB1.
output_line(ctx, "RET;");
set_output(ctx, &ctx->mainline); // in case we were ignoring this function.
} // emit_ARB1_RET
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(ENDLOOP)
void emit_ARB1_LABEL(Context *ctx)
{
if (!support_nv2(ctx)) // no branching in stock ARB1.
return; // don't fail()...maybe we never use it, but do fail in CALL.
const int label = ctx->source_args[0].regnum;
RegisterList *reg = reglist_find(&ctx->used_registers, REG_TYPE_LABEL, label);
// MSDN specs say CALL* has to come before the LABEL, so we know if we
// can ditch the entire function here as unused.
if (reg == NULL)
set_output(ctx, &ctx->ignore); // Func not used. Parse, but don't output.
// !!! FIXME: it would be nice if we could determine if a function is
// !!! FIXME: only called once and, if so, forcibly inline it.
//const char *uses_loopreg = ((reg) && (reg->misc == 1)) ? "int aL" : "";
char labelstr[64];
get_ARB1_srcarg_varname(ctx, 0, labelstr, sizeof (labelstr));
output_line(ctx, "%s:", labelstr);
} // emit_ARB1_LABEL
void emit_ARB1_POW(Context *ctx)
{
// we can optimize this to use nv2's |abs| construct in some cases.
if ( (ctx->source_args[0].src_mod == SRCMOD_NONE) ||
(ctx->source_args[0].src_mod == SRCMOD_ABSNEGATE) )
ctx->source_args[0].src_mod = SRCMOD_ABS;
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
if (ctx->source_args[0].src_mod == SRCMOD_ABS)
output_line(ctx, "POW%s, %s, %s;", dst, src0, src1);
else
{
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
output_line(ctx, "ABS %s, %s;", buf, src0);
output_line(ctx, "POW%s, %s.x, %s;", dst, buf, src1);
} // else
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_POW
void emit_ARB1_CRS(Context *ctx) { emit_ARB1_opcode_dss(ctx, "XPD"); }
void emit_ARB1_SGN(Context *ctx)
{
if (support_nv2(ctx))
emit_ARB1_opcode_ds(ctx, "SSG");
else
{
char dst[64];
char src0[64];
char scratch1[64];
char scratch2[64];
make_ARB1_destarg_string(ctx, dst, sizeof (dst));
make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
allocate_ARB1_scratch_reg_name(ctx, scratch1, sizeof (scratch1));
allocate_ARB1_scratch_reg_name(ctx, scratch2, sizeof (scratch2));
output_line(ctx, "SLT %s, %s, 0.0;", scratch1, src0);
output_line(ctx, "SLT %s, -%s, 0.0;", scratch2, src0);
output_line(ctx, "ADD%s -%s, %s;", dst, scratch1, scratch2);
emit_ARB1_dest_modifiers(ctx);
} // else
} // emit_ARB1_SGN
EMIT_ARB1_OPCODE_DS_FUNC(ABS)
void emit_ARB1_NRM(Context *ctx)
{
// nv2 fragment programs (and anything nv4) have a real NRM.
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
emit_ARB1_opcode_ds(ctx, "NRM");
else
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
output_line(ctx, "DP3 %s.w, %s, %s;", buf, src0, src0);
output_line(ctx, "RSQ %s.w, %s.w;", buf, buf);
output_line(ctx, "MUL%s, %s.w, %s;", dst, buf, src0);
emit_ARB1_dest_modifiers(ctx);
} // else
} // emit_ARB1_NRM
void emit_ARB1_SINCOS(Context *ctx)
{
// we don't care about the temp registers that <= sm2 demands; ignore them.
const int mask = ctx->dest_arg.writemask;
// arb1 fragment programs and everything nv4 have sin/cos/sincos opcodes.
if ((shader_is_pixel(ctx)) || (support_nv4(ctx)))
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
if (writemask_x(mask))
output_line(ctx, "COS%s, %s;", dst, src0);
else if (writemask_y(mask))
output_line(ctx, "SIN%s, %s;", dst, src0);
else if (writemask_xy(mask))
output_line(ctx, "SCS%s, %s;", dst, src0);
} // if
// nv2+ profiles have sin and cos opcodes.
else if (support_nv2(ctx))
{
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
if (writemask_x(mask))
output_line(ctx, "COS %s.x, %s;", dst, src0);
else if (writemask_y(mask))
output_line(ctx, "SIN %s.y, %s;", dst, src0);
else if (writemask_xy(mask))
{
output_line(ctx, "SIN %s.x, %s;", dst, src0);
output_line(ctx, "COS %s.y, %s;", dst, src0);
} // else if
} // if
else // big nasty.
{
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
const int need_sin = (writemask_x(mask) || writemask_xy(mask));
const int need_cos = (writemask_y(mask) || writemask_xy(mask));
char scratch[64];
if (need_sin || need_cos)
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
// These sin() and cos() approximations originally found here:
// http://www.devmaster.net/forums/showthread.php?t=5784
//
// const float B = 4.0f / M_PI;
// const float C = -4.0f / (M_PI * M_PI);
// float y = B * x + C * x * fabs(x);
//
// // optional better precision...
// const float P = 0.225f;
// y = P * (y * fabs(y) - y) + y;
//
//
// That first thing can be reduced to:
// const float y = ((1.2732395447351626861510701069801f * x) +
// ((-0.40528473456935108577551785283891f * x) * fabs(x)));
if (need_sin)
{
// !!! FIXME: use SRCMOD_ABS here?
output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
output_line(ctx, "MAD %s.x, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
} // if
// cosine is sin(x + M_PI/2), but you have to wrap x to pi:
// if (x+(M_PI/2) > M_PI)
// x -= 2 * M_PI;
//
// which is...
// if (x+(1.57079637050628662109375) > 3.1415927410125732421875)
// x += -6.283185482025146484375;
if (need_cos)
{
output_line(ctx, "ADD %s.x, %s.x, 1.57079637050628662109375;", scratch, src0);
output_line(ctx, "SGE %s.y, %s.x, 3.1415927410125732421875;", scratch, scratch);
output_line(ctx, "MAD %s.x, %s.y, -6.283185482025146484375, %s.x;", scratch, scratch, scratch);
output_line(ctx, "ABS %s.x, %s.x;", dst, src0);
output_line(ctx, "MUL %s.x, %s.x, -0.40528473456935108577551785283891;", dst, dst);
output_line(ctx, "MUL %s.x, %s.x, 1.2732395447351626861510701069801;", scratch, src0);
output_line(ctx, "MAD %s.y, %s.x, %s.x, %s.x;", dst, dst, src0, scratch);
} // if
} // else
// !!! FIXME: might not have done anything. Don't emit if we didn't.
if (!(ctx->isfail))
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_SINCOS
void emit_ARB1_REP(Context *ctx)
{
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
// nv2 fragment programs (and everything nv4) have a real REP.
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
output_line(ctx, "REP %s;", src0);
else if (support_nv2(ctx))
{
// no REP, but we can use branches.
char failbranch[32];
char topbranch[32];
const int toplabel = allocate_branch_label(ctx);
const int faillabel = allocate_branch_label(ctx);
get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
assert(((size_t) ctx->branch_labels_stack_index) <
STATICARRAYLEN(ctx->branch_labels_stack)-1);
ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = toplabel;
ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = faillabel;
char scratch[32];
snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
output_line(ctx, "MOVC %s.x, %s;", scratch, src0);
output_line(ctx, "BRA %s (LE.x);", failbranch);
output_line(ctx, "%s:", topbranch);
} // else if
else // stock ARB1 has no branching.
{
fail(ctx, "branching unsupported in this profile");
} // else
} // emit_ARB1_REP
void emit_ARB1_ENDREP(Context *ctx)
{
// nv2 fragment programs (and everything nv4) have a real ENDREP.
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
output_line(ctx, "ENDREP;");
else if (support_nv2(ctx))
{
// no ENDREP, but we can use branches.
assert(ctx->branch_labels_stack_index >= 2);
char failbranch[32];
char topbranch[32];
const int faillabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
const int toplabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
get_ARB1_branch_label_name(ctx,toplabel,topbranch,sizeof(topbranch));
char scratch[32];
snprintf(scratch, sizeof (scratch), "rep%d", ctx->reps);
output_line(ctx, "SUBC %s.x, %s.x, 1.0;", scratch, scratch);
output_line(ctx, "BRA %s (GT.x);", topbranch);
output_line(ctx, "%s:", failbranch);
} // else if
else // stock ARB1 has no branching.
{
fail(ctx, "branching unsupported in this profile");
} // else
} // emit_ARB1_ENDREP
void nv2_if(Context *ctx)
{
// The condition code register MUST be set up before this!
// nv2 fragment programs (and everything nv4) have a real IF.
if ( (support_nv4(ctx)) || (shader_is_pixel(ctx)) )
output_line(ctx, "IF EQ.x;");
else
{
// there's no IF construct, but we can use a branch to a label.
char failbranch[32];
const int label = allocate_branch_label(ctx);
get_ARB1_branch_label_name(ctx, label, failbranch, sizeof (failbranch));
assert(((size_t) ctx->branch_labels_stack_index)
< STATICARRAYLEN(ctx->branch_labels_stack));
ctx->branch_labels_stack[ctx->branch_labels_stack_index++] = label;
// !!! FIXME: should this be NE? (EQ would jump to the ELSE for the IF condition, right?).
output_line(ctx, "BRA %s (EQ.x);", failbranch);
} // else
} // nv2_if
void emit_ARB1_IF(Context *ctx)
{
if (support_nv2(ctx))
{
char buf[64]; allocate_ARB1_scratch_reg_name(ctx, buf, sizeof (buf));
char src0[64]; get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
output_line(ctx, "MOVC %s.x, %s;", buf, src0);
nv2_if(ctx);
} // if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_IF
void emit_ARB1_ELSE(Context *ctx)
{
// nv2 fragment programs (and everything nv4) have a real ELSE.
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
output_line(ctx, "ELSE;");
else if (support_nv2(ctx))
{
// there's no ELSE construct, but we can use a branch to a label.
assert(ctx->branch_labels_stack_index > 0);
// At the end of the IF block, unconditionally jump to the ENDIF.
const int endlabel = allocate_branch_label(ctx);
char endbranch[32];
get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
output_line(ctx, "BRA %s;", endbranch);
// Now mark the ELSE section with a lable.
const int elselabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index-1];
char elsebranch[32];
get_ARB1_branch_label_name(ctx,elselabel,elsebranch,sizeof(elsebranch));
output_line(ctx, "%s:", elsebranch);
// Replace the ELSE label with the ENDIF on the label stack.
ctx->branch_labels_stack[ctx->branch_labels_stack_index-1] = endlabel;
} // else if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_ELSE
void emit_ARB1_ENDIF(Context *ctx)
{
// nv2 fragment programs (and everything nv4) have a real ENDIF.
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
output_line(ctx, "ENDIF;");
else if (support_nv2(ctx))
{
// there's no ENDIF construct, but we can use a branch to a label.
assert(ctx->branch_labels_stack_index > 0);
const int endlabel = ctx->branch_labels_stack[--ctx->branch_labels_stack_index];
char endbranch[32];
get_ARB1_branch_label_name(ctx,endlabel,endbranch,sizeof (endbranch));
output_line(ctx, "%s:", endbranch);
} // if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_ENDIF
void emit_ARB1_BREAK(Context *ctx)
{
// nv2 fragment programs (and everything nv4) have a real BREAK.
if ( (support_nv4(ctx)) || ((support_nv2(ctx)) && (shader_is_pixel(ctx))) )
output_line(ctx, "BRK;");
else if (support_nv2(ctx))
{
// no BREAK, but we can use branches.
assert(ctx->branch_labels_stack_index >= 2);
const int faillabel = ctx->branch_labels_stack[ctx->branch_labels_stack_index];
char failbranch[32];
get_ARB1_branch_label_name(ctx,faillabel,failbranch,sizeof(failbranch));
output_line(ctx, "BRA %s;", failbranch);
} // else if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_BREAK
void emit_ARB1_MOVA(Context *ctx)
{
// nv2 and nv3 can use the ARR opcode.
// But nv4 removed ARR (and ADDRESS registers!). Just ROUND to an INT.
if (support_nv4(ctx))
emit_ARB1_opcode_ds(ctx, "ROUND.S"); // !!! FIXME: don't use a modifier here.
else if ((support_nv2(ctx)) || (support_nv3(ctx)))
emit_ARB1_opcode_ds(ctx, "ARR");
else
{
char src0[64];
char scratch[64];
char addr[32];
make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
snprintf(addr, sizeof (addr), "addr%d", ctx->dest_arg.regnum);
// !!! FIXME: we can optimize this if src_mod is ABS or ABSNEGATE.
// ARL uses floor(), but D3D expects round-to-nearest.
// There is probably a more efficient way to do this.
if (shader_is_pixel(ctx)) // CMP only exists in fragment programs. :/
output_line(ctx, "CMP %s, %s, -1.0, 1.0;", scratch, src0);
else
{
output_line(ctx, "SLT %s, %s, 0.0;", scratch, src0);
output_line(ctx, "MAD %s, %s, -2.0, 1.0;", scratch, scratch);
} // else
output_line(ctx, "ABS %s, %s;", addr, src0);
output_line(ctx, "ADD %s, %s, 0.5;", addr, addr);
output_line(ctx, "FLR %s, %s;", addr, addr);
output_line(ctx, "MUL %s, %s, %s;", addr, addr, scratch);
// we don't handle these right now, since emit_ARB1_dest_modifiers(ctx)
// wants to look at dest_arg, not our temp register.
assert(ctx->dest_arg.result_mod == 0);
assert(ctx->dest_arg.result_shift == 0);
// we assign to the actual address register as needed.
ctx->last_address_reg_component = -1;
} // else
} // emit_ARB1_MOVA
void emit_ARB1_TEXKILL(Context *ctx)
{
// d3d kills on xyz, arb1 kills on xyzw. Fix the swizzle.
// We just map the x component to w. If it's negative, the fragment
// would discard anyhow, otherwise, it'll pass through okay. This saves
// us a temp register.
char dst[64];
get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
output_line(ctx, "KIL %s.xyzx;", dst);
} // emit_ARB1_TEXKILL
static void arb1_texbem(Context *ctx, const int luminance)
{
// !!! FIXME: this code counts on the register not having swizzles, etc.
const int stage = ctx->dest_arg.regnum;
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
char src[64]; get_ARB1_srcarg_varname(ctx, 0, src, sizeof (src));
char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
char sampler[64];
get_ARB1_varname_in_buf(ctx, REG_TYPE_SAMPLER, stage,
sampler, sizeof (sampler));
output_line(ctx, "MUL %s, %s_texbem.xzyw, %s.xyxy;", tmp, sampler, src);
output_line(ctx, "ADD %s.xy, %s.xzxx, %s.ywxx;", tmp, tmp, tmp);
output_line(ctx, "ADD %s.xy, %s, %s;", tmp, tmp, dst);
output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, tmp, stage);
if (luminance) // TEXBEML, not just TEXBEM?
{
output_line(ctx, "MAD %s, %s.zzzz, %s_texbeml.xxxx, %s_texbeml.yyyy;",
tmp, src, sampler, sampler);
output_line(ctx, "MUL %s, %s, %s;", dst, dst, tmp);
} // if
emit_ARB1_dest_modifiers(ctx);
} // arb1_texbem
void emit_ARB1_TEXBEM(Context *ctx)
{
arb1_texbem(ctx, 0);
} // emit_ARB1_TEXBEM
void emit_ARB1_TEXBEML(Context *ctx)
{
arb1_texbem(ctx, 1);
} // emit_ARB1_TEXBEML
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2AR)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2GB)
void emit_ARB1_TEXM3X2PAD(Context *ctx)
{
// no-op ... work happens in emit_ARB1_TEXM3X2TEX().
} // emit_ARB1_TEXM3X2PAD
void emit_ARB1_TEXM3X2TEX(Context *ctx)
{
if (ctx->texm3x2pad_src0 == -1)
return;
char dst[64];
char src0[64];
char src1[64];
char src2[64];
// !!! FIXME: this code counts on the register not having swizzles, etc.
const int stage = ctx->dest_arg.regnum;
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_src0,
src0, sizeof (src0));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x2pad_dst0,
src1, sizeof (src1));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
src2, sizeof (src2));
get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, dst);
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
output_line(ctx, "TEX %s, %s, texture[%d], 2D;", dst, dst, stage);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_TEXM3X2TEX
void emit_ARB1_TEXM3X3PAD(Context *ctx)
{
// no-op ... work happens in emit_ARB1_TEXM3X3*().
} // emit_ARB1_TEXM3X3PAD
void emit_ARB1_TEXM3X3TEX(Context *ctx)
{
if (ctx->texm3x3pad_src1 == -1)
return;
char dst[64];
char src0[64];
char src1[64];
char src2[64];
char src3[64];
char src4[64];
// !!! FIXME: this code counts on the register not having swizzles, etc.
const int stage = ctx->dest_arg.regnum;
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
src0, sizeof (src0));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
src1, sizeof (src1));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
src2, sizeof (src2));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
src3, sizeof (src3));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
src4, sizeof (src4));
get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, dst, stage, ttypestr);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_TEXM3X3TEX
void emit_ARB1_TEXM3X3SPEC(Context *ctx)
{
if (ctx->texm3x3pad_src1 == -1)
return;
char dst[64];
char src0[64];
char src1[64];
char src2[64];
char src3[64];
char src4[64];
char src5[64];
char tmp[64];
char tmp2[64];
// !!! FIXME: this code counts on the register not having swizzles, etc.
const int stage = ctx->dest_arg.regnum;
allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
src0, sizeof (src0));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
src1, sizeof (src1));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
src2, sizeof (src2));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
src3, sizeof (src3));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
src4, sizeof (src4));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[1].regnum,
src5, sizeof (src5));
get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal
output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, src5); // normal * eyeray
// !!! FIXME: This is goofy. There's got to be a way to do vector-wide
// !!! FIXME: divides or reciprocals...right?
output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, src5);
output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_TEXM3X3SPEC
void emit_ARB1_TEXM3X3VSPEC(Context *ctx)
{
if (ctx->texm3x3pad_src1 == -1)
return;
char dst[64];
char src0[64];
char src1[64];
char src2[64];
char src3[64];
char src4[64];
char tmp[64];
char tmp2[64];
char tmp3[64];
// !!! FIXME: this code counts on the register not having swizzles, etc.
const int stage = ctx->dest_arg.regnum;
allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
allocate_ARB1_scratch_reg_name(ctx, tmp2, sizeof (tmp2));
allocate_ARB1_scratch_reg_name(ctx, tmp3, sizeof (tmp3));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
src0, sizeof (src0));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
src1, sizeof (src1));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
src2, sizeof (src2));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
src3, sizeof (src3));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
src4, sizeof (src4));
get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, stage);
const TextureType ttype = (TextureType) (sreg ? sreg->index : 0);
const char *ttypestr = (ttype == TEXTURE_TYPE_CUBE) ? "CUBE" : "3D";
output_line(ctx, "MOV %s.x, %s.w;", tmp3, src0);
output_line(ctx, "MOV %s.y, %s.w;", tmp3, src2);
output_line(ctx, "MOV %s.z, %s.w;", tmp3, dst);
output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
output_line(ctx, "MUL %s, %s, %s;", tmp, dst, dst); // normal * normal
output_line(ctx, "MUL %s, %s, %s;", tmp2, dst, tmp3); // normal * eyeray
// !!! FIXME: This is goofy. There's got to be a way to do vector-wide
// !!! FIXME: divides or reciprocals...right?
output_line(ctx, "RCP %s.x, %s.x;", tmp2, tmp2);
output_line(ctx, "RCP %s.y, %s.y;", tmp2, tmp2);
output_line(ctx, "RCP %s.z, %s.z;", tmp2, tmp2);
output_line(ctx, "RCP %s.w, %s.w;", tmp2, tmp2);
output_line(ctx, "MUL %s, %s, %s;", tmp, tmp, tmp2);
output_line(ctx, "MUL %s, %s, { 2.0, 2.0, 2.0, 2.0 };", tmp, tmp);
output_line(ctx, "MAD %s, %s, %s, -%s;", tmp, tmp, dst, tmp3);
output_line(ctx, "TEX %s, %s, texture[%d], %s;", dst, tmp, stage, ttypestr);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_TEXM3X3VSPEC
void emit_ARB1_EXPP(Context *ctx) { emit_ARB1_opcode_ds(ctx, "EX2"); }
void emit_ARB1_LOGP(Context *ctx) { arb1_log(ctx, "LG2"); }
void emit_ARB1_CND(Context *ctx)
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
char tmp[64]; allocate_ARB1_scratch_reg_name(ctx, tmp, sizeof (tmp));
// CND compares against 0.5, but we need to compare against 0.0...
// ...subtract to make up the difference.
output_line(ctx, "SUB %s, %s, { 0.5, 0.5, 0.5, 0.5 };", tmp, src0);
// D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
// switch src1 and src2 to get the same results.
output_line(ctx, "CMP%s, %s, %s, %s;", dst, tmp, src2, src1);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_CND
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXREG2RGB)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3TEX)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXM3X2DEPTH)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDP3)
void emit_ARB1_TEXM3X3(Context *ctx)
{
if (ctx->texm3x3pad_src1 == -1)
return;
char dst[64];
char src0[64];
char src1[64];
char src2[64];
char src3[64];
char src4[64];
// !!! FIXME: this code counts on the register not having swizzles, etc.
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst0,
src0, sizeof (src0));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src0,
src1, sizeof (src1));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_dst1,
src2, sizeof (src2));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->texm3x3pad_src1,
src3, sizeof (src3));
get_ARB1_varname_in_buf(ctx, REG_TYPE_TEXTURE, ctx->source_args[0].regnum,
src4, sizeof (src4));
get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
output_line(ctx, "DP3 %s.z, %s, %s;", dst, dst, src4);
output_line(ctx, "DP3 %s.x, %s, %s;", dst, src0, src1);
output_line(ctx, "DP3 %s.y, %s, %s;", dst, src2, src3);
output_line(ctx, "MOV %s.w, { 1.0, 1.0, 1.0, 1.0 };", dst);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_TEXM3X3
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXDEPTH)
void emit_ARB1_CMP(Context *ctx)
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
// D3D tests (src0 >= 0.0), but ARB1 tests (src0 < 0.0) ... so just
// switch src1 and src2 to get the same results.
output_line(ctx, "CMP%s, %s, %s, %s;", dst, src0, src2, src1);
emit_ARB1_dest_modifiers(ctx);
} // emit_ARB1_CMP
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BEM)
void emit_ARB1_DP2ADD(Context *ctx)
{
if (support_nv4(ctx)) // nv4 has a built-in equivalent to DP2ADD.
emit_ARB1_opcode_dsss(ctx, "DP2A");
else
{
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
char src0[64]; make_ARB1_srcarg_string(ctx, 0, src0, sizeof (src0));
char src1[64]; make_ARB1_srcarg_string(ctx, 1, src1, sizeof (src1));
char src2[64]; make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
char scratch[64];
// DP2ADD is:
// dst = (src0.r * src1.r) + (src0.g * src1.g) + src2.replicate_swiz
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
output_line(ctx, "MUL %s, %s, %s;", scratch, src0, src1);
output_line(ctx, "ADD %s, %s.x, %s.y;", scratch, scratch, scratch);
output_line(ctx, "ADD%s, %s.x, %s;", dst, scratch, src2);
emit_ARB1_dest_modifiers(ctx);
} // else
} // emit_ARB1_DP2ADD
void emit_ARB1_DSX(Context *ctx)
{
if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSX.
emit_ARB1_opcode_ds(ctx, "DDX");
else
failf(ctx, "DSX unsupported in %s profile", ctx->profile->name);
} // emit_ARB1_DSX
void emit_ARB1_DSY(Context *ctx)
{
if (support_nv2(ctx)) // nv2 has a built-in equivalent to DSY.
emit_ARB1_opcode_ds(ctx, "DDY");
else
failf(ctx, "DSY unsupported in %s profile", ctx->profile->name);
} // emit_ARB1_DSY
static void arb1_texld(Context *ctx, const char *opcode, const int texldd)
{
// !!! FIXME: Hack: "TEXH" is invalid in nv4. Fix this more cleanly.
if ((ctx->dest_arg.result_mod & MOD_PP) && (support_nv4(ctx)))
ctx->dest_arg.result_mod &= ~MOD_PP;
char dst[64]; make_ARB1_destarg_string(ctx, dst, sizeof (dst));
const int sm1 = !shader_version_atleast(ctx, 1, 4);
const int regnum = sm1 ? ctx->dest_arg.regnum : ctx->source_args[1].regnum;
RegisterList *sreg = reglist_find(&ctx->samplers, REG_TYPE_SAMPLER, regnum);
const char *ttype = NULL;
char src0[64];
if (sm1)
get_ARB1_destarg_varname(ctx, src0, sizeof (src0));
else
get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
//char src1[64]; get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1)); // !!! FIXME: SRC_MOD?
char src2[64] = { 0 };
char src3[64] = { 0 };
if (texldd)
{
make_ARB1_srcarg_string(ctx, 2, src2, sizeof (src2));
make_ARB1_srcarg_string(ctx, 3, src3, sizeof (src3));
} // if
// !!! FIXME: this should be in state_TEXLD, not in the arb1/glsl emitters.
if (sreg == NULL)
{
fail(ctx, "TEXLD using undeclared sampler");
return;
} // if
// SM1 only specifies dst, so don't check swizzle there.
if ( !sm1 && (!no_swizzle(ctx->source_args[1].swizzle)) )
{
// !!! FIXME: does this ever actually happen?
fail(ctx, "BUG: can't handle TEXLD with sampler swizzle at the moment");
} // if
switch ((const TextureType) sreg->index)
{
case TEXTURE_TYPE_2D: ttype = "2D"; break; // !!! FIXME: "RECT"?
case TEXTURE_TYPE_CUBE: ttype = "CUBE"; break;
case TEXTURE_TYPE_VOLUME: ttype = "3D"; break;
default: fail(ctx, "unknown texture type"); return;
} // switch
if (texldd)
{
output_line(ctx, "%s%s, %s, %s, %s, texture[%d], %s;", opcode, dst,
src0, src2, src3, regnum, ttype);
} // if
else
{
output_line(ctx, "%s%s, %s, texture[%d], %s;", opcode, dst, src0,
regnum, ttype);
} // else
} // arb1_texld
void emit_ARB1_TEXLDD(Context *ctx)
{
// With GL_NV_fragment_program2, we can use the TXD opcode.
// In stock arb1, we can settle for a standard texld, which isn't
// perfect, but oh well.
if (support_nv2(ctx))
arb1_texld(ctx, "TXD", 1);
else
arb1_texld(ctx, "TEX", 0);
} // emit_ARB1_TEXLDD
void emit_ARB1_TEXLDL(Context *ctx)
{
if ((shader_is_vertex(ctx)) && (!support_nv3(ctx)))
{
failf(ctx, "Vertex shader TEXLDL unsupported in %s profile",
ctx->profile->name);
return;
} // if
else if ((shader_is_pixel(ctx)) && (!support_nv2(ctx)))
{
failf(ctx, "Pixel shader TEXLDL unsupported in %s profile",
ctx->profile->name);
return;
} // if
// !!! FIXME: this doesn't map exactly to TEXLDL. Review this.
arb1_texld(ctx, "TXL", 0);
} // emit_ARB1_TEXLDL
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKP)
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(BREAKC)
void emit_ARB1_IFC(Context *ctx)
{
if (support_nv2(ctx))
{
const char *comps[] = {
"", "SGTC", "SEQC", "SGEC", "SGTC", "SNEC", "SLEC"
};
if (ctx->instruction_controls >= STATICARRAYLEN(comps))
{
fail(ctx, "unknown comparison control");
return;
} // if
char src0[64];
char src1[64];
char scratch[64];
const char *comp = comps[ctx->instruction_controls];
get_ARB1_srcarg_varname(ctx, 0, src0, sizeof (src0));
get_ARB1_srcarg_varname(ctx, 1, src1, sizeof (src1));
allocate_ARB1_scratch_reg_name(ctx, scratch, sizeof (scratch));
output_line(ctx, "%s %s.x, %s, %s;", comp, scratch, src0, src1);
nv2_if(ctx);
} // if
else // stock ARB1 has no branching.
{
failf(ctx, "branching unsupported in %s profile", ctx->profile->name);
} // else
} // emit_ARB1_IFC
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(SETP)
void emit_ARB1_DEF(Context *ctx)
{
const float *val = (const float *) ctx->dwords; // !!! FIXME: could be int?
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
char val0[32]; floatstr(ctx, val0, sizeof (val0), val[0], 1);
char val1[32]; floatstr(ctx, val1, sizeof (val1), val[1], 1);
char val2[32]; floatstr(ctx, val2, sizeof (val2), val[2], 1);
char val3[32]; floatstr(ctx, val3, sizeof (val3), val[3], 1);
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s = { %s, %s, %s, %s };",
dst, val0, val1, val2, val3);
pop_output(ctx);
} // emit_ARB1_DEF
void emit_ARB1_DEFI(Context *ctx)
{
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
const int32 *x = (const int32 *) ctx->dwords;
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s = { %d, %d, %d, %d };",
dst, (int) x[0], (int) x[1], (int) x[2], (int) x[3]);
pop_output(ctx);
} // emit_ARB1_DEFI
void emit_ARB1_DEFB(Context *ctx)
{
char dst[64]; get_ARB1_destarg_varname(ctx, dst, sizeof (dst));
push_output(ctx, &ctx->globals);
output_line(ctx, "PARAM %s = %d;", dst, ctx->dwords[0] ? 1 : 0);
pop_output(ctx);
} // emit_ARB1_DEFB
void emit_ARB1_DCL(Context *ctx)
{
// no-op. We do this in our emit_attribute() and emit_uniform().
} // emit_ARB1_DCL
EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC(TEXCRD)
void emit_ARB1_TEXLD(Context *ctx)
{
if (!shader_version_atleast(ctx, 1, 4))
{
arb1_texld(ctx, "TEX", 0);
return;
} // if
else if (!shader_version_atleast(ctx, 2, 0))
{
// ps_1_4 is different, too!
fail(ctx, "TEXLD == Shader Model 1.4 unimplemented."); // !!! FIXME
return;
} // if
// !!! FIXME: do texldb and texldp map between OpenGL and D3D correctly?
if (ctx->instruction_controls == CONTROL_TEXLD)
arb1_texld(ctx, "TEX", 0);
else if (ctx->instruction_controls == CONTROL_TEXLDP)
arb1_texld(ctx, "TXP", 0);
else if (ctx->instruction_controls == CONTROL_TEXLDB)
arb1_texld(ctx, "TXB", 0);
} // emit_ARB1_TEXLD
#undef EMIT_ARB1_OPCODE_FUNC
#undef EMIT_ARB1_OPCODE_D_FUNC
#undef EMIT_ARB1_OPCODE_S_FUNC
#undef EMIT_ARB1_OPCODE_SS_FUNC
#undef EMIT_ARB1_OPCODE_DS_FUNC
#undef EMIT_ARB1_OPCODE_DSS_FUNC
#undef EMIT_ARB1_OPCODE_DSSS_FUNC
#undef EMIT_ARB1_OPCODE_DSSSS_FUNC
#undef EMIT_ARB1_OPCODE_UNIMPLEMENTED_FUNC
#endif // SUPPORT_PROFILE_ARB1
#pragma GCC visibility pop