diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b0a593bb1..7a3f0564d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -418,6 +418,7 @@ endif() if(${SRB2_CONFIG_HWRENDER}) add_definitions(-DHWRENDER) set(SRB2_HWRENDER_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/hardware/hw_batching.c ${CMAKE_CURRENT_SOURCE_DIR}/hardware/hw_bsp.c ${CMAKE_CURRENT_SOURCE_DIR}/hardware/hw_cache.c ${CMAKE_CURRENT_SOURCE_DIR}/hardware/hw_clip.c @@ -433,6 +434,7 @@ if(${SRB2_CONFIG_HWRENDER}) ) set (SRB2_HWRENDER_HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/hardware/hw_batching.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware/hw_clip.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware/hw_data.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware/hw_defs.h diff --git a/src/Makefile b/src/Makefile index d2dd4757a..e00c84bc9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -225,7 +225,7 @@ else OPTS+=-DHWRENDER OBJS+=$(OBJDIR)/hw_bsp.o $(OBJDIR)/hw_draw.o $(OBJDIR)/hw_light.o \ $(OBJDIR)/hw_main.o $(OBJDIR)/hw_clip.o $(OBJDIR)/hw_md2.o $(OBJDIR)/hw_cache.o $(OBJDIR)/hw_trick.o \ - $(OBJDIR)/hw_md2load.o $(OBJDIR)/hw_md3load.o $(OBJDIR)/hw_model.o $(OBJDIR)/u_list.o + $(OBJDIR)/hw_md2load.o $(OBJDIR)/hw_md3load.o $(OBJDIR)/hw_model.o $(OBJDIR)/u_list.o $(OBJDIR)/hw_batching.o endif ifdef NOHS diff --git a/src/d_main.c b/src/d_main.c index fe06be2a4..e4978205c 100644 --- a/src/d_main.c +++ b/src/d_main.c @@ -634,6 +634,28 @@ static void D_Display(void) V_DrawThinString(30, 70, V_MONOSPACE | V_YELLOWMAP, s); snprintf(s, sizeof s - 1, "fin %d", rs_swaptime / divisor); V_DrawThinString(30, 80, V_MONOSPACE | V_YELLOWMAP, s); + if (cv_grbatching.value) + { + snprintf(s, sizeof s - 1, "bsrt %d", rs_hw_batchsorttime / divisor); + V_DrawThinString(80, 55, V_MONOSPACE | V_REDMAP, s); + snprintf(s, sizeof s - 1, "bdrw %d", rs_hw_batchdrawtime / divisor); + V_DrawThinString(80, 65, V_MONOSPACE | V_REDMAP, s); + + snprintf(s, sizeof s - 1, "npol %d", rs_hw_numpolys); + V_DrawThinString(130, 10, V_MONOSPACE | V_PURPLEMAP, s); + snprintf(s, sizeof s - 1, "ndc %d", rs_hw_numcalls); + V_DrawThinString(130, 20, V_MONOSPACE | V_PURPLEMAP, s); + snprintf(s, sizeof s - 1, "nshd %d", rs_hw_numshaders); + V_DrawThinString(130, 30, V_MONOSPACE | V_PURPLEMAP, s); + snprintf(s, sizeof s - 1, "nvrt %d", rs_hw_numverts); + V_DrawThinString(130, 40, V_MONOSPACE | V_PURPLEMAP, s); + snprintf(s, sizeof s - 1, "ntex %d", rs_hw_numtextures); + V_DrawThinString(185, 10, V_MONOSPACE | V_PURPLEMAP, s); + snprintf(s, sizeof s - 1, "npf %d", rs_hw_numpolyflags); + V_DrawThinString(185, 20, V_MONOSPACE | V_PURPLEMAP, s); + snprintf(s, sizeof s - 1, "ncol %d", rs_hw_numcolors); + V_DrawThinString(185, 30, V_MONOSPACE | V_PURPLEMAP, s); + } } else // software specific stats { diff --git a/src/hardware/hw_batching.c b/src/hardware/hw_batching.c new file mode 100644 index 000000000..4e11b0ab7 --- /dev/null +++ b/src/hardware/hw_batching.c @@ -0,0 +1,450 @@ +// SONIC ROBO BLAST 2 +//----------------------------------------------------------------------------- +// Copyright (C) 1998-2000 by DooM Legacy Team. +// Copyright (C) 1999-2020 by Sonic Team Junior. +// +// This program is free software distributed under the +// terms of the GNU General Public License, version 2. +// See the 'LICENSE' file for more details. +//----------------------------------------------------------------------------- +/// \file hw_batching.c +/// \brief Draw call batching and related things. + +#ifdef HWRENDER +#include "hw_glob.h" +#include "hw_batching.h" +#include "../i_system.h" + +// The texture for the next polygon given to HWR_ProcessPolygon. +// Set with HWR_SetCurrentTexture. +GLMipmap_t *current_texture = NULL; + +boolean currently_batching = false; + +FOutVector* finalVertexArray = NULL;// contains subset of sorted vertices and texture coordinates to be sent to gpu +UINT32* finalVertexIndexArray = NULL;// contains indexes for glDrawElements, taking into account fan->triangles conversion +// NOTE have this alloced as 3x finalVertexArray size +int finalVertexArrayAllocSize = 65536; +//GLubyte* colorArray = NULL;// contains color data to be sent to gpu, if needed +//int colorArrayAllocSize = 65536; +// not gonna use this for now, just sort by color and change state when it changes +// later maybe when using vertex attributes if it's needed + +PolygonArrayEntry* polygonArray = NULL;// contains the polygon data from DrawPolygon, waiting to be processed +int polygonArraySize = 0; +unsigned int* polygonIndexArray = NULL;// contains sorting pointers for polygonArray +int polygonArrayAllocSize = 65536; + +FOutVector* unsortedVertexArray = NULL;// contains unsorted vertices and texture coordinates from DrawPolygon +int unsortedVertexArraySize = 0; +int unsortedVertexArrayAllocSize = 65536; + +// Enables batching mode. HWR_ProcessPolygon will collect polygons instead of passing them directly to the rendering backend. +// Call HWR_RenderBatches to render all the collected geometry. +void HWR_StartBatching(void) +{ + if (currently_batching) + I_Error("Repeat call to HWR_StartBatching without HWR_RenderBatches"); + + // init arrays if that has not been done yet + if (!finalVertexArray) + { + finalVertexArray = malloc(finalVertexArrayAllocSize * sizeof(FOutVector)); + finalVertexIndexArray = malloc(finalVertexArrayAllocSize * 3 * sizeof(UINT32)); + polygonArray = malloc(polygonArrayAllocSize * sizeof(PolygonArrayEntry)); + polygonIndexArray = malloc(polygonArrayAllocSize * sizeof(unsigned int)); + unsortedVertexArray = malloc(unsortedVertexArrayAllocSize * sizeof(FOutVector)); + } + + currently_batching = true; +} + +// This replaces the direct calls to pfnSetTexture in cases where batching is available. +// The texture selection is saved for the next HWR_ProcessPolygon call. +// Doing this was easier than getting a texture pointer to HWR_ProcessPolygon. +void HWR_SetCurrentTexture(GLMipmap_t *texture) +{ + if (currently_batching) + { + current_texture = texture; + } + else + { + HWD.pfnSetTexture(texture); + } +} + +// If batching is enabled, this function collects the polygon data and the chosen texture +// for later use in HWR_RenderBatches. Otherwise the rendering backend is used to +// render the polygon immediately. +void HWR_ProcessPolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags, int shader, boolean horizonSpecial) +{ + if (currently_batching) + { + if (!pSurf) + I_Error("Got a null FSurfaceInfo in batching");// nulls should not come in the stuff that batching currently applies to + if (polygonArraySize == polygonArrayAllocSize) + { + PolygonArrayEntry* new_array; + // ran out of space, make new array double the size + polygonArrayAllocSize *= 2; + new_array = malloc(polygonArrayAllocSize * sizeof(PolygonArrayEntry)); + memcpy(new_array, polygonArray, polygonArraySize * sizeof(PolygonArrayEntry)); + free(polygonArray); + polygonArray = new_array; + // also need to redo the index array, dont need to copy it though + free(polygonIndexArray); + polygonIndexArray = malloc(polygonArrayAllocSize * sizeof(unsigned int)); + } + + while (unsortedVertexArraySize + (int)iNumPts > unsortedVertexArrayAllocSize) + { + FOutVector* new_array; + // need more space for vertices in unsortedVertexArray + unsortedVertexArrayAllocSize *= 2; + new_array = malloc(unsortedVertexArrayAllocSize * sizeof(FOutVector)); + memcpy(new_array, unsortedVertexArray, unsortedVertexArraySize * sizeof(FOutVector)); + free(unsortedVertexArray); + unsortedVertexArray = new_array; + } + + // add the polygon data to the arrays + + polygonArray[polygonArraySize].surf = *pSurf; + polygonArray[polygonArraySize].vertsIndex = unsortedVertexArraySize; + polygonArray[polygonArraySize].numVerts = iNumPts; + polygonArray[polygonArraySize].polyFlags = PolyFlags; + polygonArray[polygonArraySize].texture = current_texture; + polygonArray[polygonArraySize].shader = shader; + polygonArray[polygonArraySize].horizonSpecial = horizonSpecial; + polygonArraySize++; + + memcpy(&unsortedVertexArray[unsortedVertexArraySize], pOutVerts, iNumPts * sizeof(FOutVector)); + unsortedVertexArraySize += iNumPts; + } + else + { + if (shader) + HWD.pfnSetShader(shader); + HWD.pfnDrawPolygon(pSurf, pOutVerts, iNumPts, PolyFlags); + } +} + +static int comparePolygons(const void *p1, const void *p2) +{ + unsigned int index1 = *(const unsigned int*)p1; + unsigned int index2 = *(const unsigned int*)p2; + PolygonArrayEntry* poly1 = &polygonArray[index1]; + PolygonArrayEntry* poly2 = &polygonArray[index2]; + int diff; + INT64 diff64; + + int shader1 = poly1->shader; + int shader2 = poly2->shader; + // make skywalls and horizon lines first in order + if (poly1->polyFlags & PF_NoTexture || poly1->horizonSpecial) + shader1 = -1; + if (poly2->polyFlags & PF_NoTexture || poly2->horizonSpecial) + shader2 = -1; + diff = shader1 - shader2; + if (diff != 0) return diff; + + // skywalls and horizon lines must retain their order for horizon lines to work + if (shader1 == -1 && shader2 == -1) + return index1 - index2; + + diff64 = poly1->texture - poly2->texture; + if (diff64 != 0) return diff64; + + diff = poly1->polyFlags - poly2->polyFlags; + if (diff != 0) return diff; + + diff64 = poly1->surf.PolyColor.rgba - poly2->surf.PolyColor.rgba; + if (diff64 < 0) return -1; else if (diff64 > 0) return 1; + diff64 = poly1->surf.TintColor.rgba - poly2->surf.TintColor.rgba; + if (diff64 < 0) return -1; else if (diff64 > 0) return 1; + diff64 = poly1->surf.FadeColor.rgba - poly2->surf.FadeColor.rgba; + if (diff64 < 0) return -1; else if (diff64 > 0) return 1; + + diff = poly1->surf.LightInfo.light_level - poly2->surf.LightInfo.light_level; + if (diff != 0) return diff; + diff = poly1->surf.LightInfo.fade_start - poly2->surf.LightInfo.fade_start; + if (diff != 0) return diff; + diff = poly1->surf.LightInfo.fade_end - poly2->surf.LightInfo.fade_end; + return diff; +} + +static int comparePolygonsNoShaders(const void *p1, const void *p2) +{ + unsigned int index1 = *(const unsigned int*)p1; + unsigned int index2 = *(const unsigned int*)p2; + PolygonArrayEntry* poly1 = &polygonArray[index1]; + PolygonArrayEntry* poly2 = &polygonArray[index2]; + int diff; + INT64 diff64; + + GLMipmap_t *texture1 = poly1->texture; + GLMipmap_t *texture2 = poly2->texture; + if (poly1->polyFlags & PF_NoTexture || poly1->horizonSpecial) + texture1 = NULL; + if (poly2->polyFlags & PF_NoTexture || poly2->horizonSpecial) + texture2 = NULL; + diff64 = texture1 - texture2; + if (diff64 != 0) return diff64; + + // skywalls and horizon lines must retain their order for horizon lines to work + if (texture1 == NULL && texture2 == NULL) + return index1 - index2; + + diff = poly1->polyFlags - poly2->polyFlags; + if (diff != 0) return diff; + + diff64 = poly1->surf.PolyColor.rgba - poly2->surf.PolyColor.rgba; + if (diff64 < 0) return -1; else if (diff64 > 0) return 1; + + return 0; +} + +// This function organizes the geometry collected by HWR_ProcessPolygon calls into batches and uses +// the rendering backend to draw them. +void HWR_RenderBatches(void) +{ + int finalVertexWritePos = 0;// position in finalVertexArray + int finalIndexWritePos = 0;// position in finalVertexIndexArray + + int polygonReadPos = 0;// position in polygonIndexArray + + int currentShader; + GLMipmap_t *currentTexture; + FBITFIELD currentPolyFlags; + FSurfaceInfo currentSurfaceInfo; + + int i; + + if (!currently_batching) + I_Error("HWR_RenderBatches called without starting batching"); + + currently_batching = false;// no longer collecting batches + if (!polygonArraySize) + { + rs_hw_numpolys = rs_hw_numcalls = rs_hw_numshaders = rs_hw_numtextures = rs_hw_numpolyflags = rs_hw_numcolors = 0; + return;// nothing to draw + } + // init stats vars + rs_hw_numpolys = polygonArraySize; + rs_hw_numcalls = rs_hw_numverts = 0; + rs_hw_numshaders = rs_hw_numtextures = rs_hw_numpolyflags = rs_hw_numcolors = 1; + // init polygonIndexArray + for (i = 0; i < polygonArraySize; i++) + { + polygonIndexArray[i] = i; + } + + // sort polygons + rs_hw_batchsorttime = I_GetTimeMicros(); + if (cv_grshaders.value) // TODO also have the shader availability check here when its done + qsort(polygonIndexArray, polygonArraySize, sizeof(unsigned int), comparePolygons); + else + qsort(polygonIndexArray, polygonArraySize, sizeof(unsigned int), comparePolygonsNoShaders); + rs_hw_batchsorttime = I_GetTimeMicros() - rs_hw_batchsorttime; + // sort order + // 1. shader + // 2. texture + // 3. polyflags + // 4. colors + light level + // not sure about what order of the last 2 should be, or if it even matters + + rs_hw_batchdrawtime = I_GetTimeMicros(); + + currentShader = polygonArray[polygonIndexArray[0]].shader; + currentTexture = polygonArray[polygonIndexArray[0]].texture; + currentPolyFlags = polygonArray[polygonIndexArray[0]].polyFlags; + currentSurfaceInfo = polygonArray[polygonIndexArray[0]].surf; + // For now, will sort and track the colors. Vertex attributes could be used instead of uniforms + // and a color array could replace the color calls. + + // set state for first batch + + if (cv_grshaders.value) // TODO also have the shader availability check here when its done + { + HWD.pfnSetShader(currentShader); + } + + if (currentPolyFlags & PF_NoTexture) + currentTexture = NULL; + else + HWD.pfnSetTexture(currentTexture); + + while (1)// note: remember handling notexture polyflag as having texture number 0 (also in comparePolygons) + { + int firstIndex; + int lastIndex; + + boolean stopFlag = false; + boolean changeState = false; + boolean changeShader = false; + int nextShader; + boolean changeTexture = false; + GLMipmap_t *nextTexture; + boolean changePolyFlags = false; + FBITFIELD nextPolyFlags; + boolean changeSurfaceInfo = false; + FSurfaceInfo nextSurfaceInfo; + + // steps: + // write vertices + // check for changes or end, otherwise go back to writing + // changes will affect the next vars and the change bools + // end could set flag for stopping + // execute draw call + // could check ending flag here + // change states according to next vars and change bools, updating the current vars and reseting the bools + // reset write pos + // repeat loop + + int index = polygonIndexArray[polygonReadPos++]; + int numVerts = polygonArray[index].numVerts; + // before writing, check if there is enough room + // using 'while' instead of 'if' here makes sure that there will *always* be enough room. + // probably never will this loop run more than once though + while (finalVertexWritePos + numVerts > finalVertexArrayAllocSize) + { + FOutVector* new_array; + unsigned int* new_index_array; + finalVertexArrayAllocSize *= 2; + new_array = malloc(finalVertexArrayAllocSize * sizeof(FOutVector)); + memcpy(new_array, finalVertexArray, finalVertexWritePos * sizeof(FOutVector)); + free(finalVertexArray); + finalVertexArray = new_array; + // also increase size of index array, 3x of vertex array since + // going from fans to triangles increases vertex count to 3x + new_index_array = malloc(finalVertexArrayAllocSize * 3 * sizeof(UINT32)); + memcpy(new_index_array, finalVertexIndexArray, finalIndexWritePos * sizeof(UINT32)); + free(finalVertexIndexArray); + finalVertexIndexArray = new_index_array; + } + // write the vertices of the polygon + memcpy(&finalVertexArray[finalVertexWritePos], &unsortedVertexArray[polygonArray[index].vertsIndex], + numVerts * sizeof(FOutVector)); + // write the indexes, pointing to the fan vertexes but in triangles format + firstIndex = finalVertexWritePos; + lastIndex = finalVertexWritePos + numVerts; + finalVertexWritePos += 2; + while (finalVertexWritePos < lastIndex) + { + finalVertexIndexArray[finalIndexWritePos++] = firstIndex; + finalVertexIndexArray[finalIndexWritePos++] = finalVertexWritePos - 1; + finalVertexIndexArray[finalIndexWritePos++] = finalVertexWritePos++; + } + + if (polygonReadPos >= polygonArraySize) + { + stopFlag = true; + } + else + { + // check if a state change is required, set the change bools and next vars + int nextIndex = polygonIndexArray[polygonReadPos]; + nextShader = polygonArray[nextIndex].shader; + nextTexture = polygonArray[nextIndex].texture; + nextPolyFlags = polygonArray[nextIndex].polyFlags; + nextSurfaceInfo = polygonArray[nextIndex].surf; + if (nextPolyFlags & PF_NoTexture) + nextTexture = 0; + if (currentShader != nextShader) + { + changeState = true; + changeShader = true; + } + if (currentTexture != nextTexture) + { + changeState = true; + changeTexture = true; + } + if (currentPolyFlags != nextPolyFlags) + { + changeState = true; + changePolyFlags = true; + } + if (cv_grshaders.value) // TODO also have the shader availability check here when its done + { + if (currentSurfaceInfo.PolyColor.rgba != nextSurfaceInfo.PolyColor.rgba || + currentSurfaceInfo.TintColor.rgba != nextSurfaceInfo.TintColor.rgba || + currentSurfaceInfo.FadeColor.rgba != nextSurfaceInfo.FadeColor.rgba || + currentSurfaceInfo.LightInfo.light_level != nextSurfaceInfo.LightInfo.light_level || + currentSurfaceInfo.LightInfo.fade_start != nextSurfaceInfo.LightInfo.fade_start || + currentSurfaceInfo.LightInfo.fade_end != nextSurfaceInfo.LightInfo.fade_end) + { + changeState = true; + changeSurfaceInfo = true; + } + } + else + { + if (currentSurfaceInfo.PolyColor.rgba != nextSurfaceInfo.PolyColor.rgba) + { + changeState = true; + changeSurfaceInfo = true; + } + } + } + + if (changeState || stopFlag) + { + // execute draw call + HWD.pfnDrawIndexedTriangles(¤tSurfaceInfo, finalVertexArray, finalIndexWritePos, currentPolyFlags, finalVertexIndexArray); + // update stats + rs_hw_numcalls++; + rs_hw_numverts += finalIndexWritePos; + // reset write positions + finalVertexWritePos = 0; + finalIndexWritePos = 0; + } + else continue; + + // if we're here then either its time to stop or time to change state + if (stopFlag) break; + + // change state according to change bools and next vars, update current vars and reset bools + if (changeShader) + { + HWD.pfnSetShader(nextShader); + currentShader = nextShader; + changeShader = false; + + rs_hw_numshaders++; + } + if (changeTexture) + { + // texture should be already ready for use from calls to SetTexture during batch collection + HWD.pfnSetTexture(nextTexture); + currentTexture = nextTexture; + changeTexture = false; + + rs_hw_numtextures++; + } + if (changePolyFlags) + { + currentPolyFlags = nextPolyFlags; + changePolyFlags = false; + + rs_hw_numpolyflags++; + } + if (changeSurfaceInfo) + { + currentSurfaceInfo = nextSurfaceInfo; + changeSurfaceInfo = false; + + rs_hw_numcolors++; + } + // and that should be it? + } + // reset the arrays (set sizes to 0) + polygonArraySize = 0; + unsortedVertexArraySize = 0; + + rs_hw_batchdrawtime = I_GetTimeMicros() - rs_hw_batchdrawtime; +} + + +#endif // HWRENDER diff --git a/src/hardware/hw_batching.h b/src/hardware/hw_batching.h new file mode 100644 index 000000000..7c108a4bd --- /dev/null +++ b/src/hardware/hw_batching.h @@ -0,0 +1,37 @@ +// SONIC ROBO BLAST 2 +//----------------------------------------------------------------------------- +// Copyright (C) 1998-2000 by DooM Legacy Team. +// Copyright (C) 1999-2020 by Sonic Team Junior. +// +// This program is free software distributed under the +// terms of the GNU General Public License, version 2. +// See the 'LICENSE' file for more details. +//----------------------------------------------------------------------------- +/// \file hw_batching.h +/// \brief Draw call batching and related things. + +#ifndef __HWR_BATCHING_H__ +#define __HWR_BATCHING_H__ + +#include "hw_defs.h" +#include "hw_data.h" +#include "hw_drv.h" + +typedef struct +{ + FSurfaceInfo surf;// surf also has its own polyflags for some reason, but it seems unused + unsigned int vertsIndex;// location of verts in unsortedVertexArray + FUINT numVerts; + FBITFIELD polyFlags; + GLMipmap_t *texture; + int shader; + // this tells batching that the plane belongs to a horizon line and must be drawn in correct order with the skywalls + boolean horizonSpecial; +} PolygonArrayEntry; + +void HWR_StartBatching(void); +void HWR_SetCurrentTexture(GLMipmap_t *texture); +void HWR_ProcessPolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags, int shader, boolean horizonSpecial); +void HWR_RenderBatches(void); + +#endif diff --git a/src/hardware/hw_cache.c b/src/hardware/hw_cache.c index a64feab08..ab9a50dd5 100644 --- a/src/hardware/hw_cache.c +++ b/src/hardware/hw_cache.c @@ -15,6 +15,7 @@ #ifdef HWRENDER #include "hw_glob.h" #include "hw_drv.h" +#include "hw_batching.h" #include "../doomstat.h" //gamemode #include "../i_video.h" //rendermode @@ -738,8 +739,11 @@ GLTexture_t *HWR_GetTexture(INT32 tex) if (!grtex->mipmap.grInfo.data && !grtex->mipmap.downloaded) HWR_GenerateTexture(tex, grtex); - // Tell the hardware driver to bind the current texture to the flat's mipmap - HWD.pfnSetTexture(&grtex->mipmap); + // If hardware does not have the texture, then call pfnSetTexture to upload it + if (!grtex->mipmap.downloaded) + HWD.pfnSetTexture(&grtex->mipmap); + + HWR_SetCurrentTexture(&grtex->mipmap); // The system-memory data can be purged now. Z_ChangeTag(grtex->mipmap.grInfo.data, PU_HWRCACHE_UNLOCKED); @@ -818,7 +822,11 @@ void HWR_LiterallyGetFlat(lumpnum_t flatlumpnum) if (!grmip->downloaded && !grmip->grInfo.data) HWR_CacheFlat(grmip, flatlumpnum); - HWD.pfnSetTexture(grmip); + // If hardware does not have the texture, then call pfnSetTexture to upload it + if (!grmip->downloaded) + HWD.pfnSetTexture(grmip); + + HWR_SetCurrentTexture(grmip); // The system-memory data can be purged now. Z_ChangeTag(grmip->grInfo.data, PU_HWRCACHE_UNLOCKED); @@ -852,14 +860,17 @@ void HWR_GetLevelFlat(levelflat_t *levelflat) if (!grtex->mipmap.grInfo.data && !grtex->mipmap.downloaded) HWR_CacheTextureAsFlat(&grtex->mipmap, texturenum); - // Tell the hardware driver to bind the current texture to the flat's mipmap - HWD.pfnSetTexture(&grtex->mipmap); + // If hardware does not have the texture, then call pfnSetTexture to upload it + if (!grtex->mipmap.downloaded) + HWD.pfnSetTexture(&grtex->mipmap); + + HWR_SetCurrentTexture(&grtex->mipmap); // The system-memory data can be purged now. Z_ChangeTag(grtex->mipmap.grInfo.data, PU_HWRCACHE_UNLOCKED); } else // set no texture - HWD.pfnSetTexture(NULL); + HWR_SetCurrentTexture(NULL); } // @@ -881,7 +892,11 @@ static void HWR_LoadMappedPatch(GLMipmap_t *grmip, GLPatch_t *gpatch) Z_Free(patch); } - HWD.pfnSetTexture(grmip); + // If hardware does not have the texture, then call pfnSetTexture to upload it + if (!grmip->downloaded) + HWD.pfnSetTexture(grmip); + + HWR_SetCurrentTexture(grmip); // The system-memory data can be purged now. Z_ChangeTag(grmip->grInfo.data, PU_HWRCACHE_UNLOCKED); @@ -908,7 +923,11 @@ void HWR_GetPatch(GLPatch_t *gpatch) Z_Free(ptr); } - HWD.pfnSetTexture(gpatch->mipmap); + // If hardware does not have the texture, then call pfnSetTexture to upload it + if (!gpatch->mipmap->downloaded) + HWD.pfnSetTexture(gpatch->mipmap); + + HWR_SetCurrentTexture(gpatch->mipmap); // The system-memory patch data can be purged now. Z_ChangeTag(gpatch->mipmap->grInfo.data, PU_HWRCACHE_UNLOCKED); diff --git a/src/hardware/hw_drv.h b/src/hardware/hw_drv.h index a09f3f224..6cc45a363 100644 --- a/src/hardware/hw_drv.h +++ b/src/hardware/hw_drv.h @@ -36,6 +36,7 @@ EXPORT void HWRAPI(SetPalette) (RGBA_t *ppal); EXPORT void HWRAPI(FinishUpdate) (INT32 waitvbl); EXPORT void HWRAPI(Draw2DLine) (F2DCoord *v1, F2DCoord *v2, RGBA_t Color); EXPORT void HWRAPI(DrawPolygon) (FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags); +EXPORT void HWRAPI(DrawIndexedTriangles) (FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags, unsigned int *IndexArray); EXPORT void HWRAPI(RenderSkyDome) (INT32 tex, INT32 texture_width, INT32 texture_height, FTransform transform); EXPORT void HWRAPI(SetBlend) (FBITFIELD PolyFlags); EXPORT void HWRAPI(ClearBuffer) (FBOOLEAN ColorMask, FBOOLEAN DepthMask, FRGBAFloat *ClearColor); @@ -89,6 +90,7 @@ struct hwdriver_s FinishUpdate pfnFinishUpdate; Draw2DLine pfnDraw2DLine; DrawPolygon pfnDrawPolygon; + DrawIndexedTriangles pfnDrawIndexedTriangles; RenderSkyDome pfnRenderSkyDome; SetBlend pfnSetBlend; ClearBuffer pfnClearBuffer; diff --git a/src/hardware/hw_main.c b/src/hardware/hw_main.c index 7186f732f..3163b701a 100644 --- a/src/hardware/hw_main.c +++ b/src/hardware/hw_main.c @@ -18,6 +18,7 @@ #include "hw_glob.h" #include "hw_light.h" #include "hw_drv.h" +#include "hw_batching.h" #include "../i_video.h" // for rendermode == render_glide #include "../v_video.h" @@ -150,6 +151,17 @@ int rs_hw_nodedrawtime = 0; int rs_hw_spritesorttime = 0; int rs_hw_spritedrawtime = 0; +// Render stats for batching +int rs_hw_numpolys = 0; +int rs_hw_numverts = 0; +int rs_hw_numcalls = 0; +int rs_hw_numshaders = 0; +int rs_hw_numtextures = 0; +int rs_hw_numpolyflags = 0; +int rs_hw_numcolors = 0; +int rs_hw_batchsorttime = 0; +int rs_hw_batchdrawtime = 0; + // ========================================================================== // Lighting @@ -351,6 +363,8 @@ static void HWR_RenderPlane(subsector_t *subsector, extrasubsector_t *xsub, bool static FOutVector *planeVerts = NULL; static UINT16 numAllocedPlaneVerts = 0; + int shader; + // no convex poly were generated for this subsector if (!xsub->planepoly) return; @@ -433,7 +447,7 @@ static void HWR_RenderPlane(subsector_t *subsector, extrasubsector_t *xsub, bool } } else // set no texture - HWD.pfnSetTexture(NULL); + HWR_SetCurrentTexture(NULL); // reference point for flat texture coord for each vertex around the polygon flatxref = (float)(((fixed_t)pv->x & (~flatflag)) / fflatwidth); @@ -543,13 +557,13 @@ static void HWR_RenderPlane(subsector_t *subsector, extrasubsector_t *xsub, bool PolyFlags |= PF_Masked|PF_Modulated; if (PolyFlags & PF_Fog) - HWD.pfnSetShader(6); // fog shader + shader = 6; // fog shader else if (PolyFlags & PF_Ripple) - HWD.pfnSetShader(5); // water shader + shader = 5; // water shader else - HWD.pfnSetShader(1); // floor shader + shader = 1; // floor shader - HWD.pfnDrawPolygon(&Surf, planeVerts, nrPlaneVerts, PolyFlags); + HWR_ProcessPolygon(&Surf, planeVerts, nrPlaneVerts, PolyFlags, shader, false); if (subsector) { @@ -618,7 +632,7 @@ static void HWR_RenderPlane(subsector_t *subsector, extrasubsector_t *xsub, bool horizonpts[4].y = gr_viewz; // Draw - HWD.pfnDrawPolygon(&Surf, horizonpts, 6, PolyFlags); + HWR_ProcessPolygon(&Surf, horizonpts, 6, PolyFlags, shader, true); } } } @@ -780,8 +794,7 @@ static void HWR_ProjectWall(FOutVector *wallVerts, FSurfaceInfo *pSurf, FBITFIEL { HWR_Lighting(pSurf, lightlevel, wallcolormap); - HWD.pfnSetShader(2); // wall shader - HWD.pfnDrawPolygon(pSurf, wallVerts, 4, blendmode|PF_Modulated|PF_Occlude); + HWR_ProcessPolygon(pSurf, wallVerts, 4, blendmode|PF_Modulated|PF_Occlude, 2, false); // wall shader #ifdef WALLSPLATS if (gr_curline->linedef->splats && cv_splats.value) @@ -1009,7 +1022,7 @@ static void HWR_SplitWall(sector_t *sector, FOutVector *wallVerts, INT32 texnum, // Draw walls into the depth buffer so that anything behind is culled properly static void HWR_DrawSkyWall(FOutVector *wallVerts, FSurfaceInfo *Surf) { - HWD.pfnSetTexture(NULL); + HWR_SetCurrentTexture(NULL); // no texture wallVerts[3].t = wallVerts[2].t = 0; wallVerts[0].t = wallVerts[1].t = 0; @@ -2739,7 +2752,7 @@ static void HWR_RenderPolyObjectPlane(polyobj_t *polysector, boolean isceiling, } } else // set no texture - HWD.pfnSetTexture(NULL); + HWR_SetCurrentTexture(NULL); // reference point for flat texture coord for each vertex around the polygon flatxref = (float)((polysector->origVerts[0].x & (~flatflag)) / fflatwidth); @@ -2837,8 +2850,7 @@ static void HWR_RenderPolyObjectPlane(polyobj_t *polysector, boolean isceiling, else blendmode |= PF_Masked|PF_Modulated|PF_Clip; - HWD.pfnSetShader(1); // floor shader - HWD.pfnDrawPolygon(&Surf, planeVerts, nrPlaneVerts, blendmode); + HWR_ProcessPolygon(&Surf, planeVerts, nrPlaneVerts, blendmode, 1, false); // floor shader } static void HWR_AddPolyObjectPlanes(void) @@ -3625,8 +3637,7 @@ static void HWR_DrawDropShadow(mobj_t *thing, fixed_t scale) HWR_Lighting(&sSurf, 0, colormap); sSurf.PolyColor.s.alpha = alpha; - HWD.pfnSetShader(3); // sprite shader - HWD.pfnDrawPolygon(&sSurf, shadowVerts, 4, PF_Translucent|PF_Modulated|PF_Clip); + HWR_ProcessPolygon(&sSurf, shadowVerts, 4, PF_Translucent|PF_Modulated|PF_Clip, 3, false); // sprite shader } // This is expecting a pointer to an array containing 4 wallVerts for a sprite @@ -3889,8 +3900,7 @@ static void HWR_SplitSprite(gr_vissprite_t *spr) Surf.PolyColor.s.alpha = alpha; - HWD.pfnSetShader(3); // sprite shader - HWD.pfnDrawPolygon(&Surf, wallVerts, 4, blend|PF_Modulated|PF_Clip); + HWR_ProcessPolygon(&Surf, wallVerts, 4, blend|PF_Modulated|PF_Clip, 3, false); // sprite shader top = bot; endtop = endbot; @@ -3916,8 +3926,7 @@ static void HWR_SplitSprite(gr_vissprite_t *spr) Surf.PolyColor.s.alpha = alpha; - HWD.pfnSetShader(3); // sprite shader - HWD.pfnDrawPolygon(&Surf, wallVerts, 4, blend|PF_Modulated|PF_Clip); + HWR_ProcessPolygon(&Surf, wallVerts, 4, blend|PF_Modulated|PF_Clip, 3, false); // sprite shader } // -----------------+ @@ -4062,8 +4071,7 @@ static void HWR_DrawSprite(gr_vissprite_t *spr) blend = PF_Translucent|PF_Occlude; } - HWD.pfnSetShader(3); // sprite shader - HWD.pfnDrawPolygon(&Surf, wallVerts, 4, blend|PF_Modulated|PF_Clip); + HWR_ProcessPolygon(&Surf, wallVerts, 4, blend|PF_Modulated|PF_Clip, 3, false); // sprite shader } } @@ -4162,8 +4170,7 @@ static inline void HWR_DrawPrecipitationSprite(gr_vissprite_t *spr) blend = PF_Translucent|PF_Occlude; } - HWD.pfnSetShader(3); // sprite shader - HWD.pfnDrawPolygon(&Surf, wallVerts, 4, blend|PF_Modulated|PF_Clip); + HWR_ProcessPolygon(&Surf, wallVerts, 4, blend|PF_Modulated|PF_Clip, 3, false); // sprite shader } #endif @@ -5450,6 +5457,9 @@ void HWR_RenderSkyboxView(INT32 viewnumber, player_t *player) validcount++; + if (cv_grbatching.value) + HWR_StartBatching(); + HWR_RenderBSPNode((INT32)numnodes-1); #ifndef NEWCLIP @@ -5481,6 +5491,9 @@ void HWR_RenderSkyboxView(INT32 viewnumber, player_t *player) } #endif + if (cv_grbatching.value) + HWR_RenderBatches(); + // Check for new console commands. NetUpdate(); @@ -5661,6 +5674,9 @@ void HWR_RenderPlayerView(INT32 viewnumber, player_t *player) validcount++; + if (cv_grbatching.value) + HWR_StartBatching(); + HWR_RenderBSPNode((INT32)numnodes-1); #ifndef NEWCLIP @@ -5694,6 +5710,9 @@ void HWR_RenderPlayerView(INT32 viewnumber, player_t *player) rs_bsptime = I_GetTimeMicros() - rs_bsptime; + if (cv_grbatching.value) + HWR_RenderBatches(); + // Check for new console commands. NetUpdate(); @@ -5785,6 +5804,8 @@ consvar_t cv_granisotropicmode = {"gr_anisotropicmode", "1", CV_CALL, granisotro consvar_t cv_grcorrecttricks = {"gr_correcttricks", "Off", 0, CV_OnOff, NULL, 0, NULL, NULL, 0, 0, NULL}; consvar_t cv_grsolvetjoin = {"gr_solvetjoin", "On", 0, CV_OnOff, NULL, 0, NULL, NULL, 0, 0, NULL}; +consvar_t cv_grbatching = {"gr_batching", "On", 0, CV_OnOff, NULL, 0, NULL, NULL, 0, 0, NULL}; + static void CV_grfiltermode_OnChange(void) { if (rendermode == render_opengl) @@ -5824,6 +5845,7 @@ void HWR_AddCommands(void) CV_RegisterVar(&cv_grsolvetjoin); CV_RegisterVar(&cv_renderstats); + CV_RegisterVar(&cv_grbatching); #ifndef NEWCLIP CV_RegisterVar(&cv_grclipwalls); @@ -5949,12 +5971,14 @@ void HWR_RenderWall(FOutVector *wallVerts, FSurfaceInfo *pSurf, FBITFIELD blend, FBITFIELD blendmode = blend; UINT8 alpha = pSurf->PolyColor.s.alpha; // retain the alpha + int shader; + // Lighting is done here instead so that fog isn't drawn incorrectly on transparent walls after sorting HWR_Lighting(pSurf, lightlevel, wallcolormap); pSurf->PolyColor.s.alpha = alpha; // put the alpha back after lighting - HWD.pfnSetShader(2); // wall shader + shader = 2; // wall shader if (blend & PF_Environment) blendmode |= PF_Occlude; // PF_Occlude must be used for solid objects @@ -5962,12 +5986,12 @@ void HWR_RenderWall(FOutVector *wallVerts, FSurfaceInfo *pSurf, FBITFIELD blend, if (fogwall) { blendmode |= PF_Fog; - HWD.pfnSetShader(6); // fog shader + shader = 6; // fog shader } blendmode |= PF_Modulated; // No PF_Occlude means overlapping (incorrect) transparency - HWD.pfnDrawPolygon(pSurf, wallVerts, 4, blendmode); + HWR_ProcessPolygon(pSurf, wallVerts, 4, blendmode, shader, false); #ifdef WALLSPLATS if (gr_curline->linedef->splats && cv_splats.value) diff --git a/src/hardware/hw_main.h b/src/hardware/hw_main.h index fc49364da..4aadc2c39 100644 --- a/src/hardware/hw_main.h +++ b/src/hardware/hw_main.h @@ -96,6 +96,8 @@ extern consvar_t cv_grskydome; extern consvar_t cv_grfakecontrast; extern consvar_t cv_grslopecontrast; +extern consvar_t cv_grbatching; + extern float gr_viewwidth, gr_viewheight, gr_baseviewwindowy; extern float gr_viewwindowx, gr_basewindowcentery; @@ -111,4 +113,15 @@ extern int rs_hw_nodedrawtime; extern int rs_hw_spritesorttime; extern int rs_hw_spritedrawtime; +// Render stats for batching +extern int rs_hw_numpolys; +extern int rs_hw_numverts; +extern int rs_hw_numcalls; +extern int rs_hw_numshaders; +extern int rs_hw_numtextures; +extern int rs_hw_numpolyflags; +extern int rs_hw_numcolors; +extern int rs_hw_batchsorttime; +extern int rs_hw_batchdrawtime; + #endif diff --git a/src/hardware/r_opengl/r_opengl.c b/src/hardware/r_opengl/r_opengl.c index d80de8aad..bbeec6e0b 100644 --- a/src/hardware/r_opengl/r_opengl.c +++ b/src/hardware/r_opengl/r_opengl.c @@ -1945,10 +1945,9 @@ static void Shader_SetUniforms(FSurfaceInfo *Surface, GLRGBAFloat *poly, GLRGBAF #endif } -// -----------------+ -// DrawPolygon : Render a polygon, set the texture, set render mode -// -----------------+ -EXPORT void HWRAPI(DrawPolygon) (FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags) +// code that is common between DrawPolygon and DrawIndexedTriangles +// the corona thing is there too, i have no idea if that stuff works with DrawIndexedTriangles and batching +static void PreparePolygon(FSurfaceInfo *pSurf, FOutVector *pOutVerts, FBITFIELD PolyFlags) { static GLRGBAFloat poly = {0,0,0,0}; static GLRGBAFloat tint = {0,0,0,0}; @@ -2013,10 +2012,10 @@ EXPORT void HWRAPI(DrawPolygon) (FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUI //GL_DBG_Printf("Projection: (%f, %f, %f)\n", px, py, pz); if ((pz < 0.0l) || - (px < -8.0l) || - (py < viewport[1]-8.0l) || - (px > viewport[2]+8.0l) || - (py > viewport[1]+viewport[3]+8.0l)) + (px < -8.0l) || + (py < viewport[1]-8.0l) || + (px > viewport[2]+8.0l) || + (py > viewport[1]+viewport[3]+8.0l)) return; // the damned slow glReadPixels functions :( @@ -2051,6 +2050,14 @@ EXPORT void HWRAPI(DrawPolygon) (FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUI } Shader_Load(pSurf, &poly, &tint, &fade); +} + +// -----------------+ +// DrawPolygon : Render a polygon, set the texture, set render mode +// -----------------+ +EXPORT void HWRAPI(DrawPolygon) (FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags) +{ + PreparePolygon(pSurf, pOutVerts, PolyFlags); pglVertexPointer(3, GL_FLOAT, sizeof(FOutVector), &pOutVerts[0].x); pglTexCoordPointer(2, GL_FLOAT, sizeof(FOutVector), &pOutVerts[0].s); @@ -2066,6 +2073,17 @@ EXPORT void HWRAPI(DrawPolygon) (FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUI Clamp2D(GL_TEXTURE_WRAP_T); } +EXPORT void HWRAPI(DrawIndexedTriangles) (FSurfaceInfo *pSurf, FOutVector *pOutVerts, FUINT iNumPts, FBITFIELD PolyFlags, unsigned int *IndexArray) +{ + PreparePolygon(pSurf, pOutVerts, PolyFlags); + + pglVertexPointer(3, GL_FLOAT, sizeof(FOutVector), &pOutVerts[0].x); + pglTexCoordPointer(2, GL_FLOAT, sizeof(FOutVector), &pOutVerts[0].s); + pglDrawElements(GL_TRIANGLES, iNumPts, GL_UNSIGNED_INT, IndexArray); + + // the DrawPolygon variant of this has some code about polyflags and wrapping here but havent noticed any problems from omitting it? +} + typedef struct vbo_vertex_s { float x, y, z; diff --git a/src/sdl/Srb2SDL-vc10.vcxproj b/src/sdl/Srb2SDL-vc10.vcxproj index 6a55ac2d6..c24104b92 100644 --- a/src/sdl/Srb2SDL-vc10.vcxproj +++ b/src/sdl/Srb2SDL-vc10.vcxproj @@ -220,6 +220,7 @@ + @@ -370,6 +371,7 @@ + diff --git a/src/sdl/Srb2SDL-vc10.vcxproj.filters b/src/sdl/Srb2SDL-vc10.vcxproj.filters index 89ba1b588..2f0aec7ca 100644 --- a/src/sdl/Srb2SDL-vc10.vcxproj.filters +++ b/src/sdl/Srb2SDL-vc10.vcxproj.filters @@ -219,6 +219,9 @@ Hw_Hardware + + Hw_Hardware + Hw_Hardware @@ -636,6 +639,9 @@ Hw_Hardware + + Hw_Hardware + Hw_Hardware diff --git a/src/sdl/hwsym_sdl.c b/src/sdl/hwsym_sdl.c index 974ac5edd..416c8d2f5 100644 --- a/src/sdl/hwsym_sdl.c +++ b/src/sdl/hwsym_sdl.c @@ -80,6 +80,7 @@ void *hwSym(const char *funcName,void *handle) GETFUNC(Init); GETFUNC(Draw2DLine); GETFUNC(DrawPolygon); + GETFUNC(DrawIndexedTriangles); GETFUNC(RenderSkyDome); GETFUNC(SetBlend); GETFUNC(ClearBuffer); diff --git a/src/sdl/i_video.c b/src/sdl/i_video.c index f06e10124..8bff5690f 100644 --- a/src/sdl/i_video.c +++ b/src/sdl/i_video.c @@ -1824,6 +1824,7 @@ void VID_StartupOpenGL(void) HWD.pfnFinishUpdate = NULL; HWD.pfnDraw2DLine = hwSym("Draw2DLine",NULL); HWD.pfnDrawPolygon = hwSym("DrawPolygon",NULL); + HWD.pfnDrawIndexedTriangles = hwSym("DrawIndexedTriangles",NULL); HWD.pfnRenderSkyDome = hwSym("RenderSkyDome",NULL); HWD.pfnSetBlend = hwSym("SetBlend",NULL); HWD.pfnClearBuffer = hwSym("ClearBuffer",NULL);