scribu / wp-posts-to-posts

Efficient many-to-many connections between posts, pages, custom post types, users.
http://wordpress.org/plugins/posts-to-posts/
974 stars 260 forks source link

SQL performances issues #219

Open herewithme opened 12 years ago

herewithme commented 12 years ago

I meet a different performance issues with your great plugin P2P :

My database :

My current SQL version : 5.5.27-1~dotdeb.0

SQL queries takes an incredible time (9.0331 sec) with this kind of queries : When i display a list of custom post type : http://mywebsite/wp-admin/edit.php?post_type=my_cpt

SELECT wpasc_posts.*, wpasc_p2p.* FROM wpasc_posts INNER JOIN wpasc_p2p WHERE 1=1 AND wpasc_posts.post_type IN ('adherent') AND (wpasc_posts.post_status = 'publish' OR wpasc_posts.post_status = 'future' OR wpasc_posts.post_status = 'draft' OR wpasc_posts.post_status = 'pending' OR wpasc_posts.post_author = 96258 AND wpasc_posts.post_status = 'private') AND (wpasc_p2p.p2p_type = 'member_to_member' AND (
(wpasc_posts.ID = wpasc_p2p.p2p_to AND wpasc_p2p.p2p_from IN (SELECT wpasc_posts.ID FROM wpasc_posts WHERE 1=1 AND wpasc_posts.ID IN (120513,120510,96094,96095,96156,96268,96269,96267,96291,96292,96332,96333,96406,96420,96551,96552,96573,96574,96572,96607) AND wpasc_posts.post_type IN ('adherent') AND (wpasc_posts.post_status = 'publish' OR wpasc_posts.post_status = 'future' OR wpasc_posts.post_status = 'draft' OR wpasc_posts.post_status = 'pending' OR wpasc_posts.post_author = 96258 AND wpasc_posts.post_status = 'private') ORDER BY wpasc_posts.post_date DESC )) OR
(wpasc_posts.ID = wpasc_p2p.p2p_from AND wpasc_p2p.p2p_to IN (SELECT wpasc_posts.ID FROM wpasc_posts WHERE 1=1 AND wpasc_posts.ID IN (120513,120510,96094,96095,96156,96268,96269,96267,96291,96292,96332,96333,96406,96420,96551,96552,96573,96574,96572,96607) AND wpasc_posts.post_type IN ('adherent') AND (wpasc_posts.post_status = 'publish' OR wpasc_posts.post_status = 'future' OR wpasc_posts.post_status = 'draft' OR wpasc_posts.post_status = 'pending' OR wpasc_posts.post_author = 96258 AND wpasc_posts.post_status = 'private') ORDER BY wpasc_posts.post_date DESC ))
)) ORDER BY wpasc_posts.post_date DESC 

Explain results give :

id  select_type table   type    possible_keys   key key_len ref rows    Extra
1   PRIMARY wpasc_posts ref PRIMARY,type_status_date,post_author    type_status_date    62  const   18712   Using where; Using temporary; Using filesort
1   PRIMARY wpasc_p2p   ALL p2p_from,p2p_to,p2p_type    NULL    NULL    NULL    47583   Using where; Using join buffer
3   DEPENDENT SUBQUERY  wpasc_posts unique_subquery PRIMARY,type_status_date,post_author    PRIMARY 8   func    1   Using where
2   DEPENDENT SUBQUERY  wpasc_posts unique_subquery PRIMARY,type_status_date,post_author    PRIMARY 8   func    1   Using where

The first problem is that the request is never cached by MySQL (because too large I think) I made a plugin to simplify the SQL query and finally allow the splitting introduced in WP 3.4. (first get IDS, after get contents with ID

The plugin :

<?php
/*
 Plugin Name: Speedup P2P
 Plugin URI: http://www.beapi.fr
 Description: Change query from P2P plugin, for get ID instead ALL data (*) for posts, make 2 queries instead once !
 Author: BeAPI
 Author URI: http://www.beapi.fr
 Version: 0.1

 ----
 Copyright 2012 Amaury Balmer (amaury@beapi.fr)
 ----
 */

class Speedup_P2P {
    function __construct() {
        add_filter('posts_request', array(&$this, 'posts_request'), 9999999, 2 );
        add_filter('posts_results', array(&$this, 'posts_results'), 1, 2 );
    }

    function posts_request( $request, $query ) {
        global $wpdb;

        $new_request = $request;
        $new_request = str_replace("SELECT SQL_CALC_FOUND_ROWS  $wpdb->posts.*, $wpdb->p2p.* FROM", "SELECT SQL_CALC_FOUND_ROWS $wpdb->posts.ID, $wpdb->p2p.* FROM", $new_request);
        $new_request = str_replace("SELECT   $wpdb->posts.*, $wpdb->p2p.* FROM", "SELECT   $wpdb->posts.ID, $wpdb->p2p.* FROM", $new_request);
        if ( $new_request != $request ) {
            $query->p2p_flag = true;
        }

        return $new_request;
    }

    function posts_results( $posts, $query ) {
        global $wpdb;

        if ( isset($query->p2p_flag) && $query->p2p_flag == true && !empty($posts) ) {
            // Restore flag 
            $query->p2p_flag = false;

            // Get posts IDs for get contents
            $_posts = array();
            foreach( $posts as $post ) {
                $_posts[] = $post->ID;
            }

            // setup posts data
            _prime_post_caches( $_posts, $query->query_vars['update_post_term_cache'], $query->query_vars['update_post_meta_cache'] );
            $_posts = array_map( 'get_post', $_posts );

            // Put ID on key
            foreach( $_posts as $key => $_post ) {
                unset($_posts[$key]);
                $_posts[$_post->ID] = $_post;
            }

            // Merge datas
            $query->posts = array();
            foreach( $posts as $post ) {
                $query->posts[] = (object) array_merge((array) $_posts[$post->ID], (array) $post);
            }

            return $query->posts;
        }

        return $posts;
    }
}
add_action( 'plugins_loaded', create_function('', 'return new Speedup_P2P();') );

The new query :

SELECT wpasc_posts.ID, wpasc_p2p.* FROM wpasc_posts INNER JOIN wpasc_p2p WHERE 1=1 AND wpasc_posts.post_type IN ('adherent') AND (wpasc_posts.post_status = 'publish' OR wpasc_posts.post_status = 'future' OR wpasc_posts.post_status = 'draft' OR wpasc_posts.post_status = 'pending' OR wpasc_posts.post_author = 96258 AND wpasc_posts.post_status = 'private') AND (wpasc_p2p.p2p_type = 'member_to_member' AND (
(wpasc_posts.ID = wpasc_p2p.p2p_to AND wpasc_p2p.p2p_from IN (SELECT wpasc_posts.ID FROM wpasc_posts WHERE 1=1 AND wpasc_posts.ID IN (120513,120510,96094,96095,96156,96268,96269,96267,96291,96292,96332,96333,96406,96420,96551,96552,96573,96574,96572,96607) AND wpasc_posts.post_type IN ('adherent') AND (wpasc_posts.post_status = 'publish' OR wpasc_posts.post_status = 'future' OR wpasc_posts.post_status = 'draft' OR wpasc_posts.post_status = 'pending' OR wpasc_posts.post_author = 96258 AND wpasc_posts.post_status = 'private') ORDER BY wpasc_posts.post_date DESC )) OR
(wpasc_posts.ID = wpasc_p2p.p2p_from AND wpasc_p2p.p2p_to IN (SELECT wpasc_posts.ID FROM wpasc_posts WHERE 1=1 AND wpasc_posts.ID IN (120513,120510,96094,96095,96156,96268,96269,96267,96291,96292,96332,96333,96406,96420,96551,96552,96573,96574,96572,96607) AND wpasc_posts.post_type IN ('adherent') AND (wpasc_posts.post_status = 'publish' OR wpasc_posts.post_status = 'future' OR wpasc_posts.post_status = 'draft' OR wpasc_posts.post_status = 'pending' OR wpasc_posts.post_author = 96258 AND wpasc_posts.post_status = 'private') ORDER BY wpasc_posts.post_date DESC ))
)) ORDER BY wpasc_posts.post_date DESC 

The results of this query are allowed in the cache MySQL queries but are still slow. (1 to 2 seconds)

How can we improve the performance of P2P in a large database? there's there any specific settings to include for mysql?

scribu commented 12 years ago

That plugin looks handy. Could you transform it into a pull request?

herewithme commented 12 years ago

Yes, I can, but the plugin does not completely solve the performance problem. Are you meet that also slow queries with this volume of data?

scribu commented 12 years ago

Well, it's a fairly complex query, so it's not that surprising. Only other optimization I could think of is transforming the inner SELECTs into JOINs, but the SQL transformations get tricky. Would be worth transforming the example query by hand first, to see how much speed is gained.

petergreen commented 10 years ago

I'm running into something similar and I'm wondering if there might yet be a way to remove the subselect and move it into a join. Here's my attempt.

The original query, as it's running on one of our sites, is:

SELECT wpmnn_posts.*, wpmnn_p2p.*
FROM wpmnn_posts
INNER JOIN wpmnn_p2p
WHERE 1=1
AND  wpmnn_posts.post_type = 'news'
AND (wpmnn_posts.post_status = 'publish')
AND (wpmnn_p2p.p2p_type = 'news_to_interviews'
AND  wpmnn_posts.ID = wpmnn_p2p.p2p_from
AND  wpmnn_p2p.p2p_to IN (
  SELECT wpmnn_posts.ID
  FROM wpmnn_posts
  WHERE 1=1
  AND  wpmnn_posts.ID IN (113088)
  AND  wpmnn_posts.post_type = 'interviews'
  AND (wpmnn_posts.post_status = 'publish' OR wpmnn_posts.post_status = 'private')
  ORDER BY wpmnn_posts.menu_order, wpmnn_posts.post_date DESC
))
ORDER BY wpmnn_posts.menu_order, wpmnn_posts.post_date DESC
LIMIT 0, 5

I've attempted to speed that up by changing it to:

SELECT a.*, c.*
FROM wpmnn_posts a, wpmnn_posts b, wpmnn_p2p c
WHERE (a.ID = c.p2p_from
  AND  c.p2p_to = b.ID
  AND  b.ID IN (113088)
  AND  b.post_type = 'interviews'
  AND (b.post_status = 'publish' OR b.post_status = 'private')
)
AND  a.post_type = 'news'
AND (a.post_status = 'publish')
AND (c.p2p_type = 'news_to_interviews')
ORDER BY b.menu_order, b.post_date DESC, a.menu_order, a.post_date DESC
LIMIT 0, 5

Initial tests seem to indicate that the results are functionally the same, but I've seen the threads and pull request that all indicate the problem might be thornier than it first seems. Which means I'm not sure my tests are rigorous enough to call this Correct... so I'm hoping you could take a quick look.

I realize the plugin is no longer supported, but this would be a big win if it works. So I'm hoping you might give this a look and see if it makes sense. (Otherwise I might just patch our version and hope for the best. ;-)

scribu commented 10 years ago

Altering the final SQL is the easy part. :)

Commit it to a branch and open a pull request, so that Travis CI can run the unit tests. If they pass, you're in good shape.

twentyfortysix commented 8 years ago

Hello, here is my result from P3 profiler.. no matter how I like post2post plugin it slows down our website heavily ./ p3 plugin profiler auto mat wordpress

scribu commented 8 years ago

@twentyfortysix Hello,

In order for that report to be of any use, you should provide some more info about it:

First of all, does the P3 profiler measure PHP execution time only, or does it also include SQL execution time?

Then:

twentyfortysix commented 8 years ago

Hello,

I'll try to answer all questions. First of all, does the P3 profiler measure PHP execution time only, or does it also include SQL execution time? p3 plugin profiler auto mat wordpress

how are your connection types set up?

// connection amog pages.. like people to projects etc.
function my_connection_types() {
    p2p_register_connection_type( 
        array(
            'name' => 'page_to_pages',
            'from' => 'page',
            'to' => 'page' ,
            'title' => array(
                'from' => __( 'Přípojené lidi z týmu', 'my-textdomain' ),
                'to' => __( 'Propojen(a) s projektama/kauzama', 'my-textdomain' )
            )
        ) 
    );
    p2p_register_connection_type( 
        array(
            'name' => 'page_to_studie',
            'from' => 'page',
            'to' => 'studie' ,
            'title' => array(
                'from' => __( 'Přípojené studie', 'my-textdomain' ),
                'to' => __( 'Propojené se strankama', 'my-textdomain' )
            )
        ) 
    );
    p2p_register_connection_type( 
        array(
            'name' => 'post_to_page',
            'from' => 'post',
            'to' => 'page' ,
            'title' => array(
                'from' => __( 'Přípojené stránky', 'my-textdomain' ),
                'to' => __( 'Propojené články', 'my-textdomain' )
            )
        ) 
    );
}
add_action( 'p2p_init', 'my_connection_types' );

how many posts do you have of each type? posts: 810 pages: 185 studie: 0

how many connections of each type do you have? page_to_pages Stránky ↔ Stránky 58 page_to_studie Stránky → Studie 0 post_to_page Příspěvky → Stránky 1

twentyfortysix commented 8 years ago

Hello, it turns out that the problem is the second green column next to the posts2posts brown column. The green one is the "Responsive Flicker gallery" plugin. If The plugin runs on a page it not only slows down the page but influences the posts2posts heavily make it slow as well.. God knows why.