jegonzal / PowerGraph

PowerGraph: A framework for large-scale machine learning and graph computation.
344 stars 518 forks source link

Double locking results in deadlock #201

Closed developerdong closed 2 years ago

developerdong commented 7 years ago
void perform_scatter_local(lvid_type lvid,
                               vertex_program_type& vprog) {
      local_vertex_type local_vertex(graph.l_vertex(lvid));
      vertex_type vertex(local_vertex);
      context_type context(*this, graph);
      edge_dir_type scatter_dir = vprog.scatter_edges(context, vertex);
      if(scatter_dir == IN_EDGES || scatter_dir == ALL_EDGES) {
        foreach(local_edge_type local_edge, local_vertex.in_edges()) {
          edge_type edge(local_edge);
          lvid_type a = edge.source().local_id(), b = edge.target().local_id();
          vertexlocks[std::min(a,b)].lock();
          vertexlocks[std::max(a,b)].lock();
          vprog.scatter(context, vertex, edge);
          vertexlocks[a].unlock();
          vertexlocks[b].unlock();
    }
 } 
      if(scatter_dir == OUT_EDGES || scatter_dir == ALL_EDGES) {
        foreach(local_edge_type local_edge, local_vertex.out_edges()) {
          edge_type edge(local_edge);
          lvid_type a = edge.source().local_id(), b = edge.target().local_id();
          vertexlocks[std::min(a,b)].lock();
          vertexlocks[std::max(a,b)].lock();
          vprog.scatter(context, vertex, edge);
          vertexlocks[a].unlock();
          vertexlocks[b].unlock();
        }
      } 
      // release locks
      if (!factorized_consistency) {
        cmlocks->philosopher_stops_eating_per_replica(lvid);
      }
    }
void internal_clear_gather_cache(const vertex_type& vertex) {
      const lvid_type lvid = vertex.local_id();
      if(use_cache && has_cache.get(lvid)) {
        vertexlocks[lvid].lock();
        gather_cache[lvid] = gather_type();
        has_cache.clear_bit(lvid);
        vertexlocks[lvid].unlock();
      }
    }
void internal_post_delta(const vertex_type& vertex,
                             const gather_type& delta) {
      if(use_cache) {
        const lvid_type lvid = vertex.local_id();
        vertexlocks[lvid].lock();
        if( has_cache.get(lvid) ) {
          gather_cache[lvid] += delta;
        } else {
          // You cannot add a delta to an empty cache.  A complete
          // gather must have been run.
          // gather_cache[lvid] = delta;
          // has_cache.set_bit(lvid);
        }
        vertexlocks[lvid].unlock();
      }
    }

As shown above, the source and target of edge will be locked before executing scatter function. And if we want to post delta to or clear the gather cache, we should first lock the vertex. My question is: when we change the gather cache in the scatter function, we will try to lock the vertex which has been locked before scatter. Does this behavior always lead to deadlocks? The Pagerank example is shown below.

void scatter(icontext_type& context, const vertex_type& vertex,
               edge_type& edge) const {
    if(USE_DELTA_CACHE) {
      context.post_delta(edge.target(), last_change);
    /*In the post_delta function, we should lock the edge target which has been locked before the scatter fcuntion. Is this behavior safe?*/
    }

    if(last_change > TOLERANCE || last_change < -TOLERANCE) {
        context.signal(edge.target());
    }
  }