Open StamosArhon opened 3 months ago
USER_TWEETS
endpoint has a limit as to how far back you can go, since it scrapes from the user's page. For your use case, the recommended endpoint will be TWEET_SEARCH
which let's you search all tweets from a user (or using any other filter). It also let's you search by date ranges. The TWEET_SEARCH
endpoint allows access to all tweets, even the oldest ones.
These are all the search filters TWEET_SEARCH
supports
So when the first json file is filled, the script gets the id of the last saved tweet and creates the next json file to keep saving from the next tweet
In your case, you can use the fromUsers
field combined with sinceId
to get the necessary results.
Thank you very much for the suggestion. I did try to use the TWEET_SEARCH endpoint and created a sample script to just check if it was fetching tweets from a given date range:
const { Rettiwt } = require('rettiwt-api');
require('dotenv').config(); // Load environment variables from .env
async function fetchTweetsFromDateRange() {
try {
console.log("Starting tweet fetch process...");
const rettiwt = new Rettiwt({
apiKey: process.env.API_KEY,
logging: true // Enable rettiwt-api's internal logging
});
const startDate = new Date('2022-01-01T00:00:00.000Z');
const endDate = new Date('2022-12-31T23:59:59.000Z');
console.log("Start Date:", startDate);
console.log("End Date:", endDate);
const filter = {
fromUsers: ['579067205'], // Target user ID
startDate: startDate, // Start date for filtering
endDate: endDate, // End date for filtering
};
let cursor;
let allTweets = []; // Array to store all tweets
do {
console.log("Fetching tweets with cursor:", cursor);
const tweets = await rettiwt.tweet.search(filter, 20, cursor);
console.log("Tweets object received:", tweets);
if (tweets && tweets.list) {
allTweets = allTweets.concat(tweets.list);
console.log(`Fetched ${tweets.list.length} tweets in this batch. Total: ${allTweets.length}`);
} else {
console.warn("Warning: No tweets.list property found in response.");
}
cursor = tweets.next ? tweets.next.value : undefined;
await new Promise(resolve => setTimeout(resolve, 1000)); // Wait for 1 second (adjust if needed)
} while (cursor);
console.log(`Final tweet count: ${allTweets.length}`);
console.log("All fetched tweets:", allTweets);
} catch (err) {
console.error("Error fetching tweets:", err);
if (err.response) {
console.error("API Response Status:", err.response.status);
console.error("API Response Data:", err.response.data);
}
}
}
fetchTweetsFromDateRange();
This is the log I get from it:
Starting tweet fetch process...
Start Date: 2022-01-01T00:00:00.000Z
End Date: 2022-12-31T23:59:59.000Z
Fetching tweets with cursor: undefined
[Rettiwt-API] [2024-07-28T11:12:13.307Z] [REQUEST] {"resource":"TWEET_SEARCH","args":{"filter":{"fromUsers":["579067205"],"startDate":"2022-01-01T00:00:00.000Z","endDate":"2022-12-31T23:59:59.000Z"},"count":20}}
[Rettiwt-API] [2024-07-28T11:12:13.307Z] [AUTHORIZATION] {"authenticated":true}
[Rettiwt-API] [2024-07-28T11:12:13.307Z] [VALIDATE] {"target":"FETCH_ARGS"}
[Rettiwt-API] [2024-07-28T11:12:13.309Z] [GET] {"target":"HTTPS_AGENT"}
[Rettiwt-API] [2024-07-28T11:12:13.310Z] [GET] {"target":"USER_CREDENTIAL"}
Tweets object received: CursoredData {
list: [],
next: Cursor {
value: 'DAADDAABAAgAAgAAAAIIAAMAAAAACAAEAAAAAAoABRk5H1-wgCcQCgAGGTkfX7B_2PAAAA'
}
}
Fetched 0 tweets in this batch. Total: 0
Fetching tweets with cursor: DAADDAABAAgAAgAAAAIIAAMAAAAACAAEAAAAAAoABRk5H1-wgCcQCgAGGTkfX7B_2PAAAA
[Rettiwt-API] [2024-07-28T11:12:14.675Z] [REQUEST] {"resource":"TWEET_SEARCH","args":{"filter":{"fromUsers":["579067205"],"startDate":"2022-01-01T00:00:00.000Z","endDate":"2022-12-31T23:59:59.000Z"},"count":20,"cursor":"DAADDAABAAgAAgAAAAIIAAMAAAAACAAEAAAAAAoABRk5H1-wgCcQCgAGGTkfX7B_2PAAAA"}}
[Rettiwt-API] [2024-07-28T11:12:14.675Z] [AUTHORIZATION] {"authenticated":true}
[Rettiwt-API] [2024-07-28T11:12:14.675Z] [VALIDATE] {"target":"FETCH_ARGS"}
[Rettiwt-API] [2024-07-28T11:12:14.676Z] [GET] {"target":"HTTPS_AGENT"}
[Rettiwt-API] [2024-07-28T11:12:14.676Z] [GET] {"target":"USER_CREDENTIAL"}
Tweets object received: CursoredData {
list: [],
next: Cursor {
value: 'DAADDAABAAgAAgAAAAIIAAMAAAAACAAEAAAAAQoABRk5H1-wgCcQCgAGGTkfX7B_seAAAA'
}
}
Fetched 0 tweets in this batch. Total: 0
Again, I tried a bunch of variations but I can't seem to get tweets from a given date, much less being able to get them all. I don't know if I am making some obvious mistake, so I'm sorry if it's something trivial that I am not seeing, but in any case thanks for taking the time to help.
fromUsers: ['579067205']
Here, it should be username and not id
I wrote a script that will get all tweets from a specific public account and save them in a json file in batches of 500, starting from the current day and going back in time until the first tweet. So when the first json file is filled, the script gets the id of the last saved tweet and creates the next json file to keep saving from the next tweet. The issue is that I started from 25th of July 2024 and I can only go back to early June but no further. From a certain point on the new json files opened are starting again from the current day.
This is the script:
I have been racking my brain with this for a week now, trying every workaround I could think of so any insight would be more than welcome! :)``