humanmade / authorship

A modern approach to author attribution in WordPress.
GNU General Public License v3.0
66 stars 7 forks source link

WP CLI migration command from publishpress authors #100

Closed tomjn closed 2 years ago

tomjn commented 2 years ago

A migration command for taking a publishpress authors site and migrating it to authorship would be very useful.

We had such a command for Siemens Ingenuity but it has issues, and requires additional steps that have been lost to time.

The goal of this issue is:

tomjn commented 2 years ago

This is the command previously used:

   /**
     * Misc commands to migrate data from PPA to authorship
     *
     * @subcommand migrate-authorship-data
     */
    public function migrate_to_authorship() {
        global $wpdb;

        // Migrate PPA terms to have the name and slug as the user id attached to each, to follow authorship conventions
        WP_CLI::line( 'Migrating PPA terms to Authorship terms: Updating name/slug to User ID.' );
        $wpdb->query( 'update  wp_terms as t1,
            ( select t.term_id, tm.meta_value from wp_terms t join wp_term_taxonomy tt on t.term_id = tt.term_id join wp_termmeta tm on t.term_id = tm.term_id and tm.meta_key = "user_id" where tt.taxonomy = "author" ) as t2
            set t1.name = t2.meta_value, t1.slug = t2.meta_value
            where t1.term_id = t2.term_id' );
        WP_CLI::line( 'Done.' );

        // Migrate PPA terms to switch their taxonomy to 'authorship' instead of 'author'
        WP_CLI::line( 'Migrating PPA terms to Authorship terms: Updating taxonomy of terms.' );
        $wpdb->query( 'update wp_term_taxonomy set  taxonomy = "authorship" where taxonomy = "author"' );
        WP_CLI::line( 'Finished the script.' );

        // Remove all term meta associated with previously-PPA terms
        WP_CLI::line( 'Migrating PPA terms to Authorship terms: Removing all related meta entries.' );
        $wpdb->query( 'delete from wp_termmeta where term_id IN ( select term_id from (
                select t.term_id from wp_terms t join wp_term_taxonomy tt on t.term_id = tt.term_id join wp_termmeta tm on t.term_id = tm.term_id and tm.meta_key = "user_id" where tt.taxonomy = "authorship"
            ) as t1 )' );
        WP_CLI::line( 'Done.' );

        // Migrate existing posts to set authorship relations for existing users, moving from using the post.author altogether
        WP_CLI::line( 'Migrating posts without authorship to include at least the main author.' );
        $posts_per_page = 100; // posts per page will be used for ticks
        $query_args = [
            'ep_integrate' => false,
            'post_type' => 'post',
            'post_status' => 'publish',
            'posts_per_page' => $posts_per_page,
            'paged' => 1,
            'tax_query' => [
                [
                    'taxonomy' => 'authorship',
                    'operator' => 'NOT EXISTS',
                ],
            ],
        ];
        $query = new WP_Query( $query_args );

        // New progress bar – provide number of all posts we'll be dealing with as well as a size of a batch processed before the first/next tick will happen
        $progress = \WP_CLI\Utils\make_progress_bar(
            sprintf(
                'Starting the command. Found %d posts',
                $query->found_posts
            ),
            $query->found_posts,
            $posts_per_page
        );

        $progress->display();

        do {
            $offset = $posts_per_page * $query_args['paged'];
            WP_CLI::line( sprintf( 'Processing %d posts at offset of %d of %d total found posts', count( $query->posts ), $offset, $query->found_posts ) );

            foreach ( $query->posts as $post ) {
                wp_set_object_terms( $post->ID, (string) $post->post_author, 'authorship' );
            }

            $progress->tick( $posts_per_page );

            // Free up memory.
            $this->stop_the_insanity();

            $query_args['paged']++;
            sleep( 5 );
            $query = new WP_Query( $query_args );
        } while ( $query->have_posts() );

        $progress->finish();

        WP_CLI::line( 'Finished the script.' );
    }

It has some issues:

My suggestion: