From eb908dd8f00f3a5f44c0e535aa4a8b37ed44373a Mon Sep 17 00:00:00 2001 From: Lightling Date: Fri, 9 Feb 2024 20:31:21 -0500 Subject: [PATCH] concurrent retrieval with user-limited threadcount --- buildDb.js | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++ config.json | 26 +++++++++++++++++++ getUser.js | 34 ++++++++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 buildDb.js create mode 100644 config.json create mode 100644 getUser.js diff --git a/buildDb.js b/buildDb.js new file mode 100644 index 0000000..46fcbc1 --- /dev/null +++ b/buildDb.js @@ -0,0 +1,74 @@ +import { readFile } from 'fs/promises'; + +import { getArg } from './args.js'; +import { getUser } from './getUser.js'; +import { error, log } from './log.js'; + +const ctx = 'buildDb.js'; + +const getMany = (processes, threadMax, directory) => new Promise((resolve, reject) => { + let running = 0; + let index = 0; + + const get = () => { + const onFinish = (currentIndex) => { + log(ctx, `Finished ${processes[currentIndex].user}/media`); + --running; + get(); + } + + while (running < threadMax && index < processes.length) { + ++running; + let currentIndex = index++; + + let proc = getUser(processes[currentIndex].user, directory); + proc.stdout.on('data', data => { + processes[currentIndex].logs.push(data); + }); + proc.stderr.on('data', _ => onFinish(currentIndex)); + proc.on('close', _ => onFinish(currentIndex)); + proc.on('error', _ => onFinish(currentIndex)); + } + + if (running === 0) { + resolve(); + } + } + get(); +}); + +const buildDb = async () => { + log(ctx, 'Grabbing db'); + let directory = '', threadMax = 1, db; + try { + directory = getArg('path'); + } catch (err) { + error(ctx, err); + return; + } + try { + threadMax = getArg('threads'); + log(ctx, `Using ${threadMax} threads`); + } catch (err) { + log(ctx, 'Using 1 thread'); + } + try { + let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' }); + db = JSON.parse(file); + } catch (err) { + error(ctx, err); + return; + } + + let processes = db.map(entry => ({ + ...entry, + logs: [], + })); + + log(ctx, `Building db using /media for ${processes.length} users`); + await getMany(processes, threadMax, directory); + + log(ctx, 'Building db using /search'); +} + +buildDb(); diff --git a/config.json b/config.json new file mode 100644 index 0000000..07259fa --- /dev/null +++ b/config.json @@ -0,0 +1,26 @@ +{ + "extractor": { + "twitter": { + "base-directory": ".", + "directory": [ + "{author['name']}" + ], + "filename": { + "locals().get('bitrate', 0) > 0": "{author['name']}-{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}-vid{num}.{extension}", + "locals().get('bitrate') == 0": "{author['name']}-{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}-gif{num}.{extension}", + "": "{author['name']}-{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}-img{num}.{extension}" + }, + "cookies": "$HOME/.config/gallery-dl/twitter-cookies.txt", + "timeline": { + "strategy": "media" + }, + "postprocessors": [ + { + "name": "metadata", + "event": "post", + "filename": "{author[name]}_{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}.json" + } + ] + } + } +} diff --git a/getUser.js b/getUser.js new file mode 100644 index 0000000..74ce257 --- /dev/null +++ b/getUser.js @@ -0,0 +1,34 @@ +import { spawn } from 'child_process'; + +import { getArg } from './args.js'; +import { error, log } from './log.js'; + +const ctx = 'getUser.js'; + +export const getUser = (user, path) => { + const url = `https://twitter.com/${user}/media`; + let args; + try { + args = getArg('args'); + } catch (err) { + log(ctx, 'No args being provided to gallery-dl'); + } + + log(ctx, `python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`); + const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`, { shell: true }); + + proc.stdout.on('data', data => { + //log(ctx, data); + }); + proc.stderr.on('data', data => { + error(ctx, data); + }); + proc.on('error', err => { + error(ctx, err); + }); + proc.on('close', code => { + log(ctx, `child process exited with code ${code}`); + }); + + return proc; +};