hoblin / x-tracker

Stats analysis tool for selected tweets.
MIT License
12 stars 2 forks source link

Backend tracking #57

Open rpominov opened 11 months ago

rpominov commented 11 months ago

This is not really a feature request, just want to share my code just in case you'll want to implement tracking internally at backend instead of via browser extension.

I've used this code to collect this data: https://rpominov.github.io/twitter-tracking/

The code is based on what's happening in the browser when a tweet is open in incognito mode.

const fs = require("fs");

// Edit this...
const tweets = [
  "https://twitter.com/P_Kallioniemi/status/1674360288445964288",
  "https://twitter.com/elonmusk/status/1710538090173837603",
];

const DEBUG = false;

const logResponse = (resp) => {
  if (DEBUG) {
    console.log(
      resp.url,
      resp.status,
      Object.fromEntries(resp.headers.entries())
    );
  }
};

const delay = (seconds) =>
  new Promise((resolve) => setTimeout(resolve, seconds * 1000));

const shuffle = (array) => {
  const order = Object.fromEntries(array.map((x) => [x, Math.random()]));
  array.sort((a, b) => order[a] - order[b]);
};

const retry = async (fn, retries = 5) => {
  try {
    return await fn();
  } catch (e) {
    if (retries > 0) {
      console.log(e, `[Retrying... (${retries})]`);
      await delay(20);
      return await retry(fn, retries - 1);
    }
    throw e;
  }
};

// don't know how to get this programmatically,
// but maybe this one will work forever
const authorizationToken =
  "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA";

// hope this won't change
const apiBase = "https://twitter.com/i/api/graphql/mbnjGF4gOwo5gyp9pe5s4A";

let guestId = null;
let guestToken = null;
const authenticate = async () => {
  console.log("Authenticating...");

  const tweetUrl = tweets[0];

  // id shouldn't expire, so we fetch it just once
  if (guestId === null) {
    const guestId = await retry(async () => {
      const resp = await fetch(tweetUrl, { redirect: "manual" });
      logResponse(resp);
      const setCookie = resp.headers.get("set-cookie");
      if (!setCookie.startsWith("guest_id=")) {
        throw new Error("No guest id cookie");
      }
      return setCookie.split(";")[0].split("=")[1];
    });
  }

  return retry(async () => {
    const resp = await fetch(tweetUrl, {
      headers: { cookie: `guest_id=${guestId}` },
    });
    logResponse(resp);

    const text = await resp.text();
    const match = text.match(/gt=(\d+);/);

    if (!match) {
      throw new Error(text);
    }

    guestToken = text.match(/gt=(\d+);/)[1];
  });
};

const track = async (tweetUrl) => {
  if (!guestToken) {
    await authenticate();
  }

  const tweetId = tweetUrl.split("/").at(-1);

  const params = new URLSearchParams();

  params.set(
    "variables",
    JSON.stringify({
      tweetId,
      withCommunity: false,
      includePromotedContent: false,
      withVoice: false,
    })
  );

  params.set(
    "features",
    JSON.stringify({
      creator_subscriptions_tweet_preview_api_enabled: true,
      tweetypie_unmention_optimization_enabled: true,
      responsive_web_edit_tweet_api_enabled: true,
      graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
      view_counts_everywhere_api_enabled: true,
      longform_notetweets_consumption_enabled: true,
      responsive_web_twitter_article_tweet_consumption_enabled: false,
      tweet_awards_web_tipping_enabled: false,
      responsive_web_home_pinned_timelines_enabled: false,
      freedom_of_speech_not_reach_fetch_enabled: true,
      standardized_nudges_misinfo: true,
      tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true,
      longform_notetweets_rich_text_read_enabled: true,
      longform_notetweets_inline_media_enabled: true,
      responsive_web_graphql_exclude_directive_enabled: true,
      verified_phone_label_enabled: false,
      responsive_web_media_download_video_enabled: false,
      responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
      responsive_web_graphql_timeline_navigation_enabled: true,
      responsive_web_enhance_cards_enabled: false,
    })
  );

  await retry(async () => {
    const response = await fetch(`${apiBase}/TweetResultByRestId?${params}`, {
      headers: {
        authorization: `Bearer ${authorizationToken}`,
        "cache-control": "no-cache",
        "content-type": "application/json",
        pragma: "no-cache",
        "x-guest-token": guestToken,
        "x-twitter-active-user": "yes",
        "x-twitter-client-language": "en-GB",
      },
    });
    logResponse(response);

    if (response.headers.get("x-rate-limit-remaining") === "1") {
      guestToken = null;
    }

    const text = await response.text();

    let json = null;
    try {
      json = JSON.parse(text);
    } catch (e) {}

    if (json === null) {
      throw new Error(`${response.status} ${text}`);
    }

    if (json.errors) {
      // guest token expired, recoverable error
      if (json.errors[0].code === 239) {
        guestToken = null;
        return;
      }

      throw new Error(JSON.stringify(json.errors));
    }

    const filename = `./${tweetId}_${new Date().getTime()}.json`;
    fs.writeFileSync(filename, JSON.stringify(json));
    console.log(`Wrote ${filename}`);
  });
};

const main = async () => {
  shuffle(tweets);
  for (const tweet of tweets) {
    await track(tweet);
    await delay(Math.random() * 10 + 5);
  }
};

main();
setInterval(main, 1000 * 60 * 3);

If authorizationToken and apiBase stop working, here's the request I got them from (filter requests by graphql after opening a tweet in incognito):

Screenshot 2023-10-15 at 18 07 10