Open vtempest opened 4 days ago
@vtempest Is there a library for that, or would we need to write it from scratch?
There is a google-api-python-client from google with Apache license. Is it possible to use it?
Yes, Apache should be good to go!
I'm looking for an opportunity to contribute in this project. I have some of the ideas, how to implement this feature. But I want to discuss a plan for feature implementation. Should I open a discussion topic? Or we can discuss it right here?
@vodkar thanks for being open to contribute. We can discuss here. Do you have any ideas where you would start from yourself so far?
@vodkar thanks for being open to contribute. We can discuss here. Do you have any ideas where you would start from yourself so far?
I thought about splitting this task to the following subtasks:
InputFormat
named GOOGLE_DOC
. At this stage, it will be possible to retrieve a content from public Google Docs, for example: https://docs.google.com/document/d/e/2PACX-1vRK-fDVc5WzQpAfjoEzDMudsG9GiMNG-LUTgZ6hrKxwTCCRvdUBDYDwpW9MiUOe_C5jryAfCQjgw-Jw/pub
At this stage I plan just to convert input document to PDF, then to parse it with existing StandardPdfPipeline
` const { google } = require('googleapis'); const express = require('express'); const fs = require('fs').promises; const path = require('path'); const session = require('express-session');
const app = express(); const PORT = process.env.PORT || 3000;
// Configure session middleware app.use(session({ secret: process.env.SESSION_SECRET || 'your-secret-key', resave: false, saveUninitialized: true, cookie: { secure: process.env.NODE_ENV === 'production' } }));
// Configure OAuth2 settings const CREDENTIALS_PATH = path.join(process.cwd(), 'credentials.json'); const TOKENS_DIR = path.join(process.cwd(), 'tokens'); const SCOPES = [ 'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/documents' ];
class GoogleAuthManager { constructor() { this.oAuth2Client = null; }
async initialize() { try { const credentials = JSON.parse(await fs.readFile(CREDENTIALS_PATH)); const { client_secret, client_id, redirect_uris } = credentials.installed;
this.oAuth2Client = new google.auth.OAuth2(
client_id,
client_secret,
'http://localhost:3000/oauth2callback' // Override redirect_uris with our server URL
);
// Ensure tokens directory exists
await fs.mkdir(TOKENS_DIR, { recursive: true });
} catch (error) {
console.error('Error initializing OAuth client:', error);
throw error;
}
}
async getAuthClient(userId) { if (!this.oAuth2Client) { throw new Error('OAuth client not initialized'); }
try {
// Try to load existing token
const tokenPath = path.join(TOKENS_DIR, `${userId}.token.json`);
const token = JSON.parse(await fs.readFile(tokenPath));
this.oAuth2Client.setCredentials(token);
// Check if token needs refresh
if (this.isTokenExpired(token)) {
const newToken = await this.refreshToken(token);
await this.saveToken(userId, newToken);
this.oAuth2Client.setCredentials(newToken);
}
return this.oAuth2Client;
} catch (error) {
// If token doesn't exist or is invalid, return null
// The application should then redirect to the auth flow
return null;
}
}
isTokenExpired(token) { if (!token.expiry_date) return true; // Return true if token expires in less than 5 minutes return token.expiry_date <= Date.now() + (5 60 1000); }
async refreshToken(token) { this.oAuth2Client.setCredentials(token); const { credentials } = await this.oAuth2Client.refreshAccessToken(); return credentials; }
getAuthUrl() { return this.oAuth2Client.generateAuthUrl({ access_type: 'offline', scope: SCOPES, prompt: 'consent' // Force prompt to ensure we get a refresh token }); }
async handleCallback(code, userId) { const { tokens } = await this.oAuth2Client.getToken(code); await this.saveToken(userId, tokens); return tokens; }
async saveToken(userId, tokens) {
const tokenPath = path.join(TOKENS_DIR, ${userId}.token.json
);
await fs.writeFile(tokenPath, JSON.stringify(tokens));
}
}
// Create and initialize the auth manager const authManager = new GoogleAuthManager(); authManager.initialize().catch(console.error);
// Express routes app.get('/auth/google', async (req, res) => { if (!req.session.userId) { req.session.userId = Date.now().toString(); // Simple user ID generation } const authUrl = authManager.getAuthUrl(); res.redirect(authUrl); });
app.get('/oauth2callback', async (req, res) => { const { code } = req.query; const userId = req.session.userId;
if (!code || !userId) { return res.status(400).send('Missing required parameters'); }
try { await authManager.handleCallback(code, userId); res.redirect('/success'); // Redirect to your application's success page } catch (error) { console.error('Error handling OAuth callback:', error); res.status(500).send('Authentication failed'); } });
// Example protected route that uses the auth client app.get('/docs/:docId', async (req, res) => { const userId = req.session.userId; if (!userId) { return res.redirect('/auth/google'); }
try { const authClient = await authManager.getAuthClient(userId); if (!authClient) { return res.redirect('/auth/google'); }
// Use the auth client to access Google Docs API
const docs = google.docs({ version: 'v1', auth: authClient });
const document = await docs.documents.get({
documentId: req.params.docId
});
res.json(document.data);
} catch (error) { console.error('Error accessing document:', error); res.status(500).send('Error accessing document'); } });
// Success page app.get('/success', (req, res) => { res.send('Authentication successful! You can close this window.'); });
// Start the server
app.listen(PORT, () => {
console.log(Server running on http://localhost:${PORT}
);
});
// Example of using the auth manager in your application code async function getAuthorizedClient(userId) { try { const authClient = await authManager.getAuthClient(userId); if (!authClient) { // Handle unauthorized state in your application throw new Error('User not authorized'); } return authClient; } catch (error) { console.error('Error getting authorized client:', error); throw error; } }
module.exports = { GoogleAuthManager, getAuthorizedClient }
const { google } = require('googleapis'); const fs = require('fs').promises; const path = require('path');
// Configure OAuth2 credentials const CREDENTIALS_PATH = path.join(process.cwd(), 'credentials.json'); const TOKEN_PATH = path.join(process.cwd(), 'token.json'); const SCOPES = [ 'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/documents' ];
async function authorize() { const credentials = JSON.parse(await fs.readFile(CREDENTIALS_PATH)); const { client_secret, client_id, redirect_uris } = credentials.installed; const oAuth2Client = new google.auth.OAuth2(client_id, client_secret, redirect_uris[0]);
try { const token = JSON.parse(await fs.readFile(TOKEN_PATH)); oAuth2Client.setCredentials(token); return oAuth2Client; } catch (error) { return getNewToken(oAuth2Client); } }
async function getNewToken(oAuth2Client) { const authUrl = oAuth2Client.generateAuthUrl({ access_type: 'offline', scope: SCOPES, });
console.log('Authorize this app by visiting:', authUrl);
// This is a simple example - in production, you'd want to use a proper web server const readline = require('readline'); const rl = readline.createInterface({ input: process.stdin, output: process.stdout, });
const code = await new Promise(resolve => { rl.question('Enter the code from that page here: ', code => { rl.close(); resolve(code); }); });
const { tokens } = await oAuth2Client.getToken(code); oAuth2Client.setCredentials(tokens); await fs.writeFile(TOKEN_PATH, JSON.stringify(tokens)); console.log('Token stored to', TOKEN_PATH); return oAuth2Client; }
async function readDocAsHtml(auth, documentId) { const docs = google.docs({ version: 'v1', auth });
try { // Get the document content const { data } = await docs.documents.get({ documentId: documentId });
// Convert document structure to HTML
let html = '<html><body>';
// Process document elements
if (data.body && data.body.content) {
for (const element of data.body.content) {
if (element.paragraph) {
html += '<p>';
for (const paragraphElement of element.paragraph.elements) {
if (paragraphElement.textRun) {
const text = paragraphElement.textRun.content;
const style = paragraphElement.textRun.textStyle || {};
// Apply basic styling
let styledText = text;
if (style.bold) styledText = `<strong>${styledText}</strong>`;
if (style.italic) styledText = `<em>${styledText}</em>`;
if (style.underline) styledText = `<u>${styledText}</u>`;
html += styledText;
}
}
html += '</p>';
}
}
}
html += '</body></html>';
return html;
} catch (error) { console.error('Error reading document:', error); throw error; } }
async function writeHtmlToDoc(auth, documentId, html) { const docs = google.docs({ version: 'v1', auth });
// Parse HTML content (you might want to use a proper HTML parser here) const cleanHtml = html .replace(/<[^>]*>/g, '') // Remove HTML tags for this simple example .trim();
try { // Create requests array for document updates const requests = [{ insertText: { location: { index: 1, // Insert at the beginning of the document }, text: cleanHtml } }];
// Execute the update
await docs.documents.batchUpdate({
documentId: documentId,
requestBody: {
requests: requests
}
});
console.log('Document updated successfully');
} catch (error) { console.error('Error updating document:', error); throw error; } }
// Example usage async function main() { try { // Authorize and get client const auth = await authorize();
// Example document ID (from the URL of a Google Doc)
const documentId = 'YOUR_DOCUMENT_ID';
// Read document as HTML
console.log('Reading document...');
const html = await readDocAsHtml(auth, documentId);
console.log('Document HTML:', html);
// Write HTML back to document
console.log('Writing HTML to document...');
const newHtml = '<p>Hello from Node.js!</p><p>This is a <strong>test</strong>.</p>';
await writeHtmlToDoc(auth, documentId, newHtml);
} catch (error) { console.error('Error:', error); } }
main(); `
Requested feature
Use their api to get the html for a Google Docs url ID if public or require login if private
Alternatives
...