Whales / Cataclysm2

The sequel to the post-apocalyptic roguelike. Infinitely better.
Other
116 stars 30 forks source link

The game works slowly #48

Open danielrempel opened 10 years ago

danielrempel commented 10 years ago

I've just seen your cata2 and tried it, but it is really slow. When I'm starting the game it takes up to 1.5-2 seconds to make one turn. I'm playing from arch linux, using xfce4 terminal. Maybe I'm missing something? Maybe I could try fixing that? I'll need a bit of guidance:)

Any more specs/info needed?

UPD: The world map works fast, no lags at all. UPD2: in 80x24 mode the game is fast, the problem is with fullscreen mode.

Whales commented 10 years ago

The game isn't really intended to be played fullscreen. Depending on the size of your screen, that could increase the processor demand by anywhere between 500 and 2,000%.

If you compile without modifying the Makefile, you are using a debug build (which slows it down somewhat), and an unoptimized build (which slows it down a LOT). Such a build is intended for testing and catching bugs, NOT for actual gameplay or high performance. Did you edit the Makefile before compiling?

The only fix required here is to limit the size of the map view window - I'll apply that soon. FoV calculations could probably be improved too, to a degree, but that's really a separate issue.

danielrempel commented 10 years ago

I didn't edit the makefile. I'll try that, thank you!:)

tung commented 10 years ago

FoV calculations could probably be improved too, to a degree

A very large degree. I compiled and linked the game with --coverage to figure out why the game ran so slowly in a large window.

Here's the number of times each line of line_of_sight in map.cpp is executed when the game is played with a 24-row terminal, starting with a fresh character and taking 20 steps left (-O0, the first column is the number of times each line is executed, the second is the line number):

    12096: 2469:std::vector<Tripoint> Map::line_of_sight(int x0, int y0, int z0,
        -: 2470:                                         int x1, int y1, int z1)
        -: 2471:{
    12096: 2472:  std::vector<Tripoint>  lines;    // Process many lines at once.
    12096: 2473:  std::vector<std::vector<Tripoint> > return_values;
    12096: 2474:  std::vector<int>    t_values; // T-values for Bresenham lines
        -: 2475:
    12096: 2476:  int dx = x1 - x0, dy = y1 - y0, dz = z1 - z0;
    12096: 2477:  int ax = abs(dx) << 1, ay = abs(dy) << 1;
    12096: 2478:  int sx = (dx < 0 ? -1 : 1), sy = (dy < 0 ? -1 : 1);
    12096: 2479:  int dist = rl_dist(x0, y0, x1, y1);
        -: 2480:  int z_step;
    12096: 2481:  if (dist == 0) {
       21: 2482:    z_step = 0;
        -: 2483:  } else {
    12075: 2484:    z_step = (100 * dz) / dist;
        -: 2485:  }
    12096: 2486:  if (dx == 0) {
      504: 2487:    sx = 0;
        -: 2488:  }
    12096: 2489:  if (dy == 0) {
      504: 2490:    sy = 0;
        -: 2491:  }
        -: 2492:
    12096: 2493:  int min_t = (ax > ay ? ay - ax : ax - ay),
    12096: 2494:      max_t = 0;
    12096: 2495:  if (dx == 0 || dy == 0) {
      987: 2496:    min_t = 0;
        -: 2497:  }
        -: 2498:// Init our "lines"
    12096: 2499:  std::vector<Tripoint> seed;
   109200: 2500:  for (int t = min_t; t <= max_t; t++) {
    97104: 2501:    lines.push_back( Tripoint(x0, y0, z0) );
    97104: 2502:    return_values.push_back(seed);
    97104: 2503:    t_values.push_back(t);
        -: 2504:  }
    12096: 2505:  int z_value = 50; // Each tile is 100 microunits tall, start halfway up
    12096: 2506:  int z_level = z0;
        -: 2507:// Keep going as long as we've got at least one valid line
   108969: 2508:  while (!lines.empty()) {
        -: 2509:// Since we track z_value universally, don't do it inside the for loop below
    96873: 2510:    bool z_stepped = false;
    96873: 2511:    int old_z = z_level;
    96873: 2512:    z_value += z_step;
    96873: 2513:    if (z_value < 0) {
    #####: 2514:      z_level--;
    #####: 2515:      z_value += 100;
    #####: 2516:      z_stepped = true;
    96873: 2517:    } else if (z_value >= 100) {
    #####: 2518:      z_level++;
    #####: 2519:      z_value -= 100;
    #####: 2520:      z_stepped = true;
        -: 2521:    }
   876498: 2522:    for (int i = 0; i < lines.size(); i++) {
   791721: 2523:      lines[i].z = z_level;
   791721: 2524:      if (ax > ay) {
   392952: 2525:        lines[i].x += sx;
   392952: 2526:        if (t_values[i] >= 0) {
   144627: 2527:          lines[i].y += sy;
   144627: 2528:          t_values[i] -= ax;
        -: 2529:        }
   392952: 2530:        t_values[i] += ay;
        -: 2531:      } else {
   398769: 2532:        lines[i].y += sy;
   398769: 2533:        if (t_values[i] >= 0) {
   150444: 2534:          lines[i].x += sx;
   150444: 2535:          t_values[i] -= ay;
        -: 2536:        }
   398769: 2537:        t_values[i] += ax;
        -: 2538:      }
   791721: 2539:      return_values[i].push_back(lines[i]);
        -: 2540:// Don't need to check z, right?
   791721: 2541:      if (lines[i].x == x1 && lines[i].y == y1) {
    12096: 2542:        return return_values[i];
        -: 2543:      }
   779625: 2544:      if (blocks_sense(SENSE_SIGHT, lines[i], z_value) ||
        -: 2545:          (z_stepped &&
    #####: 2546:           blocks_sense(SENSE_SIGHT, lines[i].x, lines[i].y, old_z))) {
    #####: 2547:        lines.erase(lines.begin() + i);
    #####: 2548:        t_values.erase(t_values.begin() + i);
    #####: 2549:        return_values.erase(return_values.begin() + i);
    #####: 2550:        i--;
        -: 2551:      }
        -: 2552:    }
        -: 2553:  }
    #####: 2554:  return std::vector<Tripoint>();
        -: 2555:}

And here's the same thing with a 55-row terminal, again, fresh character and 20 steps left:

    63525: 2469:std::vector<Tripoint> Map::line_of_sight(int x0, int y0, int z0,
        -: 2470:                                         int x1, int y1, int z1)
        -: 2471:{
    63525: 2472:  std::vector<Tripoint>  lines;    // Process many lines at once.
    63525: 2473:  std::vector<std::vector<Tripoint> > return_values;
    63525: 2474:  std::vector<int>    t_values; // T-values for Bresenham lines
        -: 2475:
    63525: 2476:  int dx = x1 - x0, dy = y1 - y0, dz = z1 - z0;
    63525: 2477:  int ax = abs(dx) << 1, ay = abs(dy) << 1;
    63525: 2478:  int sx = (dx < 0 ? -1 : 1), sy = (dy < 0 ? -1 : 1);
    63525: 2479:  int dist = rl_dist(x0, y0, x1, y1);
        -: 2480:  int z_step;
    63525: 2481:  if (dist == 0) {
       21: 2482:    z_step = 0;
        -: 2483:  } else {
    63504: 2484:    z_step = (100 * dz) / dist;
        -: 2485:  }
    63525: 2486:  if (dx == 0) {
     1155: 2487:    sx = 0;
        -: 2488:  }
    63525: 2489:  if (dy == 0) {
     1155: 2490:    sy = 0;
        -: 2491:  }
        -: 2492:
    63525: 2493:  int min_t = (ax > ay ? ay - ax : ax - ay),
    63525: 2494:      max_t = 0;
    63525: 2495:  if (dx == 0 || dy == 0) {
     2289: 2496:    min_t = 0;
        -: 2497:  }
        -: 2498:// Init our "lines"
    63525: 2499:  std::vector<Tripoint> seed;
  1227786: 2500:  for (int t = min_t; t <= max_t; t++) {
  1164261: 2501:    lines.push_back( Tripoint(x0, y0, z0) );
  1164261: 2502:    return_values.push_back(seed);
  1164261: 2503:    t_values.push_back(t);
        -: 2504:  }
    63525: 2505:  int z_value = 50; // Each tile is 100 microunits tall, start halfway up
    63525: 2506:  int z_level = z0;
        -: 2507:// Keep going as long as we've got at least one valid line
  1226862: 2508:  while (!lines.empty()) {
        -: 2509:// Since we track z_value universally, don't do it inside the for loop below
  1162848: 2510:    bool z_stepped = false;
  1162848: 2511:    int old_z = z_level;
  1162848: 2512:    z_value += z_step;
  1162848: 2513:    if (z_value < 0) {
    #####: 2514:      z_level--;
    #####: 2515:      z_value += 100;
    #####: 2516:      z_stepped = true;
  1162848: 2517:    } else if (z_value >= 100) {
    #####: 2518:      z_level++;
    #####: 2519:      z_value -= 100;
    #####: 2520:      z_stepped = true;
        -: 2521:    }
 23984191: 2522:    for (int i = 0; i < lines.size(); i++) {
 22884379: 2523:      lines[i].z = z_level;
 22884379: 2524:      if (ax > ay) {
 11433543: 2525:        lines[i].x += sx;
 11433543: 2526:        if (t_values[i] >= 0) {
  3989092: 2527:          lines[i].y += sy;
  3989092: 2528:          t_values[i] -= ax;
        -: 2529:        }
 11433543: 2530:        t_values[i] += ay;
        -: 2531:      } else {
 11450836: 2532:        lines[i].y += sy;
 11450836: 2533:        if (t_values[i] >= 0) {
  4014695: 2534:          lines[i].x += sx;
  4014695: 2535:          t_values[i] -= ay;
        -: 2536:        }
 11450836: 2537:        t_values[i] += ax;
        -: 2538:      }
 22884379: 2539:      return_values[i].push_back(lines[i]);
        -: 2540:// Don't need to check z, right?
 22884379: 2541:      if (lines[i].x == x1 && lines[i].y == y1) {
    63036: 2542:        return return_values[i];
        -: 2543:      }
 22821343: 2544:      if (blocks_sense(SENSE_SIGHT, lines[i], z_value) ||
        -: 2545:          (z_stepped &&
    #####: 2546:           blocks_sense(SENSE_SIGHT, lines[i].x, lines[i].y, old_z))) {
    12020: 2547:        lines.erase(lines.begin() + i);
    12020: 2548:        t_values.erase(t_values.begin() + i);
    12020: 2549:        return_values.erase(return_values.begin() + i);
    12020: 2550:        i--;
        -: 2551:      }
        -: 2552:    }
        -: 2553:  }
      489: 2554:  return std::vector<Tripoint>();
        -: 2555:}

In other words, the inner loop of this vision-calculating method runs 876,498 times in the 24-row case, and 23,984,191 times (!!!) in the 55-row case: 27.3 times slower despite only 4 times the area being visible!

As you can see, naive ray-casting like this scales really poorly to larger screen sizes; better field-of-view algorithms like recursive shadow casting scale much better (this is what Cataclysm DDA uses to keep the game fast even with huge screen dimensions), but I'd have no idea how to adapt it to a 3D space, which you'd have to do if you wanted to use it for Cataclysm 2.

I also profiled an earlier build of this, and the number one method called was the Tripoint constructor, probably because of all of the vectors of them that you're using for this vision code.

I've made the coverage stats for both cases available alongside the map.cpp source in this gist. I just wanted to note that the performance problem here is the algorithm, not the screen size, so it'd be a shame for the screen size to be clamped down on just because of an implementation detail.

tung commented 10 years ago

Almost forgot, I wasn't able to get these coverage stats because the current build kept segfaulting after suiciding. I had to do this fix it so I get could get meaningful stats, not sure if it's exactly the right fix:

diff --git a/monster_type.cpp b/monster_type.cpp
index 86c0a43..90994bc 100644
--- a/monster_type.cpp
+++ b/monster_type.cpp
@@ -42,8 +42,10 @@ Monster_type::Monster_type()

 Monster_type::~Monster_type()
 {
-  for (int i = 0; i < abilities.size(); i++) {
-    delete abilities[i];
+  if (!abilities_copied_from_genus) {
+    for (int i = 0; i < abilities.size(); i++) {
+      delete abilities[i];
+    }
   }
 }
danielrempel commented 10 years ago

Why does the method return an empty Tripoint vector?

Whales commented 10 years ago

Well, when I said that the problem was the screen size, what I meant was that a high screen size means a lot more calls to the FoV code, which means a lot more time spent processing that. It's a multiplicative issue :)

I've been thinking about a simpler ray-casting algorithm that doesn't actually use Bresenham at all like this old junk does. Two lines (actual lines, not a Bresenham line, which is actually a ton of calculated Tripoints and hence very slow) per tile in view.

I looked at DDA's code and it looks like they're still using the even-worse LoS calculation that I wrote in 2008? Not sure what you're referring to, tung. Shadowcasting concerns me due to the potential gameplay-altering consequences, but I'm sure I could mess around with it and come up with something that works here.

tung, thanks for the bugfix - that's definitely the right fix, oversight on my part. Thanks also for the analysis - it's mostly stuff I already knew, but I haven't done much profiling yet and wasn't aware that the Tripoint constructors were such a bog.

danilrempel, the method returns an empty vector when no line of sight exists between the two points (it has to return SOME vector, and any other result would be misleading). I suppose the return value could be changed to a bool, and then it could take a std::vector reference or pointer as a parameter, in which the actual LoS is stored. If I had better testing support (e.g. an always-the-same map you can start in for testing/benchmarking purposes) I'd do a side-by-side comparison and see if that change made much of a difference, but given tung's analysis above I'm guessing the relatively-rare null return case is negligible.

tung commented 10 years ago

I looked at DDA's code and it looks like they're still using the even-worse LoS calculation that I wrote in 2008? Not sure what you're referring to, tung.

It's implemented in map::build_seen_cache() and map::castLight() DDA's lightmap.cpp which I believe is then used by the vision system itself.

Thanks also for the analysis - it's mostly stuff I already knew, but I haven't done much profiling yet and wasn't aware that the Tripoint constructors were such a bog.

No problem. For what it's worth the Tripoint constructor is probably a hotspot because of how often it's being called rather than being a bottleneck of its own right.

masecknotrealname commented 9 years ago

I have some optimized los code if anybody needs it.

std::vector<Tripoint> Map::line_of_sight(int x0, int y0, int z0,
                                         int x1, int y1, int z1)
{
  const int dx = x1 - x0, dy = y1 - y0, dz = z1 - z0;
  const int ax = abs(dx) << 1, ay = abs(dy) << 1;
  const int sx = dx == 0 ? 0 :
                (dx < 0 ? -1 : 1),
            sy = dy == 0 ? 0 :
                (dy < 0 ? -1 : 1);
  const int dist = rl_dist(x0, y0, x1, y1);
  const int z_step = (dist == 0) ? 0 : ((100 * dz) / dist);

// Could be const, but I'm lazy
  int min_t = (ax > ay ? ay - ax : ax - ay),
      max_t = 0;
  if (dx == 0 || dy == 0) {
    min_t = 0;
  }

/* Is set to the number of t_values that should be skipped after we fail by
   encountering something that blocks our sight.
*/
  int failure_countdown = 0;

  std::vector<Tripoint> return_value;
  for (int t = min_t; t <= max_t; t++) {
    if (failure_countdown > 0) {
      failure_countdown--;
      continue;
    }
    int t_value = t; // t_value for Bresenham line
    int old_t_value = 0;
    Tripoint position = Tripoint(x0, y0, z0);
    int z_value = 50; // Each tile is 100 microunits tall, start halfway up
    int z_level = z0;
// "Inititialized" for every step
    bool z_stepped;
    int old_z;
    do {
// Track z values
      z_stepped = false;
      old_z = z_level;
      z_value += z_step;
      if (z_value < 0) {
        z_level--;
        z_value += 100;
        z_stepped = true;
      } else if (z_value >= 100) {
        z_level++; 
        z_value -= 100;
        z_stepped = true;
      }
// Track x and y values
      old_t_value = t_value;
      if (ax > ay) {
        position.x += sx;
        if (t_value >= 0) {
          position.y += sy;
          t_value -= ax;
        }
        t_value += ay;
      } else {
        position.y += sy;
        if (t_value >= 0) {
          position.x += sx;
          t_value -= ay;
        }
        t_value += ax;
      }
      return_value.push_back(position);
// Don't need to check z, right?
      if (position.x == x1 && position.y == y1)
        return return_value;
    } while ( !(
          (blocks_sense(SENSE_SIGHT, position, z_value) ||
          (z_stepped &&
           blocks_sense(SENSE_SIGHT, position.x, position.y, old_z)))
            ) );
    return_value.clear();
// The number of skipped t_values is equivelent to the number of iterations needed to make us avoid the failed spot
    if (old_t_value >= 0)
    {
// Recalculate the resetting of the t_value
      if(ax > ay)
        old_t_value -= ax; 
      else
        old_t_value -= ay;
    }
// The t_value is negative and the countdown should be positive.
    failure_countdown = -1 * old_t_value;
  }
  return return_value;
}