From 23e02d17c0709ad96fa87104a410738627b299b5 Mon Sep 17 00:00:00 2001 From: Hannu Hanhi Date: Sun, 18 Oct 2020 21:27:22 +0300 Subject: [PATCH 1/2] NPO2 span function optimization --- src/r_draw8_npo2.c | 184 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 174 insertions(+), 10 deletions(-) diff --git a/src/r_draw8_npo2.c b/src/r_draw8_npo2.c index 020155694..f82292ea2 100644 --- a/src/r_draw8_npo2.c +++ b/src/r_draw8_npo2.c @@ -23,6 +23,8 @@ void R_DrawSpan_NPO2_8 (void) fixed_t xposition; fixed_t yposition; fixed_t xstep, ystep; + fixed_t x, y; + fixed_t fixedwidth, fixedheight; UINT8 *source; UINT8 *colormap; @@ -41,9 +43,22 @@ void R_DrawSpan_NPO2_8 (void) if (dest+8 > deststop) return; + fixedwidth = ds_flatwidth << FRACBITS; + fixedheight = ds_flatheight << FRACBITS; + + // Fix xposition and yposition if they are out of bounds. + if (xposition < 0) + xposition = fixedwidth - ((UINT32)(fixedwidth - xposition) % fixedwidth); + else if (xposition >= fixedwidth) + xposition %= fixedwidth; + if (yposition < 0) + yposition = fixedheight - ((UINT32)(fixedheight - yposition) % fixedheight); + else if (yposition >= fixedheight) + yposition %= fixedheight; + while (count-- && dest <= deststop) { - fixed_t x = (xposition >> FRACBITS); +/* fixed_t x = (xposition >> FRACBITS); fixed_t y = (yposition >> FRACBITS); // Carefully align all of my Friends. @@ -53,7 +68,26 @@ void R_DrawSpan_NPO2_8 (void) y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); x %= ds_flatwidth; - y %= ds_flatheight; + y %= ds_flatheight;*/ + + // The loops here keep the texture coordinates within the texture. + // They will rarely iterate multiple times, and are cheaper than a modulo operation, + // even if using libdivide. + if (xstep < 0) // These if statements are hopefully hoisted by the compiler to above this loop + while (xposition < 0) + xposition += fixedwidth; + else + while (xposition >= fixedwidth) + xposition -= fixedwidth; + if (ystep < 0) + while (yposition < 0) + yposition += fixedheight; + else + while (yposition >= fixedheight) + yposition -= fixedheight; + + x = (xposition >> FRACBITS); + y = (yposition >> FRACBITS); *dest++ = colormap[source[((y * ds_flatwidth) + x)]]; xposition += xstep; @@ -668,6 +702,8 @@ void R_DrawSplat_NPO2_8 (void) fixed_t xposition; fixed_t yposition; fixed_t xstep, ystep; + fixed_t x, y; + fixed_t fixedwidth, fixedheight; UINT8 *source; UINT8 *colormap; @@ -684,9 +720,22 @@ void R_DrawSplat_NPO2_8 (void) colormap = ds_colormap; dest = ylookup[ds_y] + columnofs[ds_x1]; + fixedwidth = ds_flatwidth << FRACBITS; + fixedheight = ds_flatheight << FRACBITS; + + // Fix xposition and yposition if they are out of bounds. + if (xposition < 0) + xposition = fixedwidth - ((UINT32)(fixedwidth - xposition) % fixedwidth); + else if (xposition >= fixedwidth) + xposition %= fixedwidth; + if (yposition < 0) + yposition = fixedheight - ((UINT32)(fixedheight - yposition) % fixedheight); + else if (yposition >= fixedheight) + yposition %= fixedheight; + while (count-- && dest <= deststop) { - fixed_t x = (xposition >> FRACBITS); +/* fixed_t x = (xposition >> FRACBITS); fixed_t y = (yposition >> FRACBITS); // Carefully align all of my Friends. @@ -696,8 +745,25 @@ void R_DrawSplat_NPO2_8 (void) y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); x %= ds_flatwidth; - y %= ds_flatheight; + y %= ds_flatheight;*/ + // The loops here keep the texture coordinates within the texture. + // They will rarely iterate multiple times, and are cheaper than a modulo operation, + // even if using libdivide. + if (xstep < 0) // These if statements are hopefully hoisted by the compiler to above this loop + while (xposition < 0) + xposition += fixedwidth; + else + while (xposition >= fixedwidth) + xposition -= fixedwidth; + if (ystep < 0) + while (yposition < 0) + yposition += fixedheight; + else + while (yposition >= fixedheight) + yposition -= fixedheight; + x = (xposition >> FRACBITS); + y = (yposition >> FRACBITS); val = source[((y * ds_flatwidth) + x)]; if (val != TRANSPARENTPIXEL) *dest = colormap[val]; @@ -715,6 +781,8 @@ void R_DrawTranslucentSplat_NPO2_8 (void) fixed_t xposition; fixed_t yposition; fixed_t xstep, ystep; + fixed_t x, y; + fixed_t fixedwidth, fixedheight; UINT8 *source; UINT8 *colormap; @@ -731,9 +799,22 @@ void R_DrawTranslucentSplat_NPO2_8 (void) colormap = ds_colormap; dest = ylookup[ds_y] + columnofs[ds_x1]; + fixedwidth = ds_flatwidth << FRACBITS; + fixedheight = ds_flatheight << FRACBITS; + + // Fix xposition and yposition if they are out of bounds. + if (xposition < 0) + xposition = fixedwidth - ((UINT32)(fixedwidth - xposition) % fixedwidth); + else if (xposition >= fixedwidth) + xposition %= fixedwidth; + if (yposition < 0) + yposition = fixedheight - ((UINT32)(fixedheight - yposition) % fixedheight); + else if (yposition >= fixedheight) + yposition %= fixedheight; + while (count-- && dest <= deststop) { - fixed_t x = (xposition >> FRACBITS); + /*fixed_t x = (xposition >> FRACBITS); fixed_t y = (yposition >> FRACBITS); // Carefully align all of my Friends. @@ -743,8 +824,26 @@ void R_DrawTranslucentSplat_NPO2_8 (void) y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); x %= ds_flatwidth; - y %= ds_flatheight; + y %= ds_flatheight;*/ + // The loops here keep the texture coordinates within the texture. + // They will rarely iterate multiple times, and are cheaper than a modulo operation, + // even if using libdivide. + if (xstep < 0) // These if statements are hopefully hoisted by the compiler to above this loop + while (xposition < 0) + xposition += fixedwidth; + else + while (xposition >= fixedwidth) + xposition -= fixedwidth; + if (ystep < 0) + while (yposition < 0) + yposition += fixedheight; + else + while (yposition >= fixedheight) + yposition -= fixedheight; + + x = (xposition >> FRACBITS); + y = (yposition >> FRACBITS); val = source[((y * ds_flatwidth) + x)]; if (val != TRANSPARENTPIXEL) *dest = *(ds_transmap + (colormap[val] << 8) + *dest); @@ -762,6 +861,8 @@ void R_DrawTranslucentSpan_NPO2_8 (void) fixed_t xposition; fixed_t yposition; fixed_t xstep, ystep; + fixed_t x, y; + fixed_t fixedwidth, fixedheight; UINT8 *source; UINT8 *colormap; @@ -778,9 +879,22 @@ void R_DrawTranslucentSpan_NPO2_8 (void) colormap = ds_colormap; dest = ylookup[ds_y] + columnofs[ds_x1]; + fixedwidth = ds_flatwidth << FRACBITS; + fixedheight = ds_flatheight << FRACBITS; + + // Fix xposition and yposition if they are out of bounds. + if (xposition < 0) + xposition = fixedwidth - ((UINT32)(fixedwidth - xposition) % fixedwidth); + else if (xposition >= fixedwidth) + xposition %= fixedwidth; + if (yposition < 0) + yposition = fixedheight - ((UINT32)(fixedheight - yposition) % fixedheight); + else if (yposition >= fixedheight) + yposition %= fixedheight; + while (count-- && dest <= deststop) { - fixed_t x = (xposition >> FRACBITS); + /*fixed_t x = (xposition >> FRACBITS); fixed_t y = (yposition >> FRACBITS); // Carefully align all of my Friends. @@ -790,8 +904,26 @@ void R_DrawTranslucentSpan_NPO2_8 (void) y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); x %= ds_flatwidth; - y %= ds_flatheight; + y %= ds_flatheight;*/ + // The loops here keep the texture coordinates within the texture. + // They will rarely iterate multiple times, and are cheaper than a modulo operation, + // even if using libdivide. + if (xstep < 0) // These if statements are hopefully hoisted by the compiler to above this loop + while (xposition < 0) + xposition += fixedwidth; + else + while (xposition >= fixedwidth) + xposition -= fixedwidth; + if (ystep < 0) + while (yposition < 0) + yposition += fixedheight; + else + while (yposition >= fixedheight) + yposition -= fixedheight; + + x = (xposition >> FRACBITS); + y = (yposition >> FRACBITS); val = ((y * ds_flatwidth) + x); *dest = *(ds_transmap + (colormap[source[val]] << 8) + *dest); dest++; @@ -806,6 +938,8 @@ void R_DrawTranslucentWaterSpan_NPO2_8(void) fixed_t xposition; fixed_t yposition; fixed_t xstep, ystep; + fixed_t x, y; + fixed_t fixedwidth, fixedheight; UINT8 *source; UINT8 *colormap; @@ -823,9 +957,22 @@ void R_DrawTranslucentWaterSpan_NPO2_8(void) dest = ylookup[ds_y] + columnofs[ds_x1]; dsrc = screens[1] + (ds_y+ds_bgofs)*vid.width + ds_x1; + fixedwidth = ds_flatwidth << FRACBITS; + fixedheight = ds_flatheight << FRACBITS; + + // Fix xposition and yposition if they are out of bounds. + if (xposition < 0) + xposition = fixedwidth - ((UINT32)(fixedwidth - xposition) % fixedwidth); + else if (xposition >= fixedwidth) + xposition %= fixedwidth; + if (yposition < 0) + yposition = fixedheight - ((UINT32)(fixedheight - yposition) % fixedheight); + else if (yposition >= fixedheight) + yposition %= fixedheight; + while (count-- && dest <= deststop) { - fixed_t x = (xposition >> FRACBITS); + /*fixed_t x = (xposition >> FRACBITS); fixed_t y = (yposition >> FRACBITS); // Carefully align all of my Friends. @@ -835,8 +982,25 @@ void R_DrawTranslucentWaterSpan_NPO2_8(void) y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); x %= ds_flatwidth; - y %= ds_flatheight; + y %= ds_flatheight;*/ + // The loops here keep the texture coordinates within the texture. + // They will rarely iterate multiple times, and are cheaper than a modulo operation, + // even if using libdivide. + if (xstep < 0) // These if statements are hopefully hoisted by the compiler to above this loop + while (xposition < 0) + xposition += fixedwidth; + else + while (xposition >= fixedwidth) + xposition -= fixedwidth; + if (ystep < 0) + while (yposition < 0) + yposition += fixedheight; + else + while (yposition >= fixedheight) + yposition -= fixedheight; + x = (xposition >> FRACBITS); + y = (yposition >> FRACBITS); *dest++ = colormap[*(ds_transmap + (source[((y * ds_flatwidth) + x)] << 8) + *dsrc++)]; xposition += xstep; yposition += ystep; From 382ed1c31e0be865c3f4ca75bb70768b4a3777be Mon Sep 17 00:00:00 2001 From: Hannu Hanhi Date: Sun, 18 Oct 2020 23:04:01 +0300 Subject: [PATCH 2/2] Remove leftover commented code from previous commit --- src/r_draw8_npo2.c | 58 ---------------------------------------------- 1 file changed, 58 deletions(-) diff --git a/src/r_draw8_npo2.c b/src/r_draw8_npo2.c index f82292ea2..630b36e6f 100644 --- a/src/r_draw8_npo2.c +++ b/src/r_draw8_npo2.c @@ -58,18 +58,6 @@ void R_DrawSpan_NPO2_8 (void) while (count-- && dest <= deststop) { -/* fixed_t x = (xposition >> FRACBITS); - fixed_t y = (yposition >> FRACBITS); - - // Carefully align all of my Friends. - if (x < 0) - x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); - if (y < 0) - y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); - - x %= ds_flatwidth; - y %= ds_flatheight;*/ - // The loops here keep the texture coordinates within the texture. // They will rarely iterate multiple times, and are cheaper than a modulo operation, // even if using libdivide. @@ -735,17 +723,6 @@ void R_DrawSplat_NPO2_8 (void) while (count-- && dest <= deststop) { -/* fixed_t x = (xposition >> FRACBITS); - fixed_t y = (yposition >> FRACBITS); - - // Carefully align all of my Friends. - if (x < 0) - x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); - if (y < 0) - y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); - - x %= ds_flatwidth; - y %= ds_flatheight;*/ // The loops here keep the texture coordinates within the texture. // They will rarely iterate multiple times, and are cheaper than a modulo operation, // even if using libdivide. @@ -814,18 +791,6 @@ void R_DrawTranslucentSplat_NPO2_8 (void) while (count-- && dest <= deststop) { - /*fixed_t x = (xposition >> FRACBITS); - fixed_t y = (yposition >> FRACBITS); - - // Carefully align all of my Friends. - if (x < 0) - x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); - if (y < 0) - y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); - - x %= ds_flatwidth; - y %= ds_flatheight;*/ - // The loops here keep the texture coordinates within the texture. // They will rarely iterate multiple times, and are cheaper than a modulo operation, // even if using libdivide. @@ -894,18 +859,6 @@ void R_DrawTranslucentSpan_NPO2_8 (void) while (count-- && dest <= deststop) { - /*fixed_t x = (xposition >> FRACBITS); - fixed_t y = (yposition >> FRACBITS); - - // Carefully align all of my Friends. - if (x < 0) - x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); - if (y < 0) - y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); - - x %= ds_flatwidth; - y %= ds_flatheight;*/ - // The loops here keep the texture coordinates within the texture. // They will rarely iterate multiple times, and are cheaper than a modulo operation, // even if using libdivide. @@ -972,17 +925,6 @@ void R_DrawTranslucentWaterSpan_NPO2_8(void) while (count-- && dest <= deststop) { - /*fixed_t x = (xposition >> FRACBITS); - fixed_t y = (yposition >> FRACBITS); - - // Carefully align all of my Friends. - if (x < 0) - x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth); - if (y < 0) - y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight); - - x %= ds_flatwidth; - y %= ds_flatheight;*/ // The loops here keep the texture coordinates within the texture. // They will rarely iterate multiple times, and are cheaper than a modulo operation, // even if using libdivide.