1
0
Fork 0

concurrent retrieval with user-limited threadcount

This commit is contained in:
lightling 2024-02-09 20:31:21 -05:00
parent 0a9af1b964
commit eb908dd8f0
3 changed files with 134 additions and 0 deletions

74
buildDb.js Normal file
View file

@ -0,0 +1,74 @@
import { readFile } from 'fs/promises';
import { getArg } from './args.js';
import { getUser } from './getUser.js';
import { error, log } from './log.js';
const ctx = 'buildDb.js';
const getMany = (processes, threadMax, directory) => new Promise((resolve, reject) => {
let running = 0;
let index = 0;
const get = () => {
const onFinish = (currentIndex) => {
log(ctx, `Finished ${processes[currentIndex].user}/media`);
--running;
get();
}
while (running < threadMax && index < processes.length) {
++running;
let currentIndex = index++;
let proc = getUser(processes[currentIndex].user, directory);
proc.stdout.on('data', data => {
processes[currentIndex].logs.push(data);
});
proc.stderr.on('data', _ => onFinish(currentIndex));
proc.on('close', _ => onFinish(currentIndex));
proc.on('error', _ => onFinish(currentIndex));
}
if (running === 0) {
resolve();
}
}
get();
});
const buildDb = async () => {
log(ctx, 'Grabbing db');
let directory = '', threadMax = 1, db;
try {
directory = getArg('path');
} catch (err) {
error(ctx, err);
return;
}
try {
threadMax = getArg('threads');
log(ctx, `Using ${threadMax} threads`);
} catch (err) {
log(ctx, 'Using 1 thread');
}
try {
let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' });
db = JSON.parse(file);
} catch (err) {
error(ctx, err);
return;
}
let processes = db.map(entry => ({
...entry,
logs: [],
}));
log(ctx, `Building db using <user>/media for ${processes.length} users`);
await getMany(processes, threadMax, directory);
log(ctx, 'Building db using /search');
}
buildDb();

26
config.json Normal file
View file

@ -0,0 +1,26 @@
{
"extractor": {
"twitter": {
"base-directory": ".",
"directory": [
"{author['name']}"
],
"filename": {
"locals().get('bitrate', 0) > 0": "{author['name']}-{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}-vid{num}.{extension}",
"locals().get('bitrate') == 0": "{author['name']}-{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}-gif{num}.{extension}",
"": "{author['name']}-{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}-img{num}.{extension}"
},
"cookies": "$HOME/.config/gallery-dl/twitter-cookies.txt",
"timeline": {
"strategy": "media"
},
"postprocessors": [
{
"name": "metadata",
"event": "post",
"filename": "{author[name]}_{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}.json"
}
]
}
}
}

34
getUser.js Normal file
View file

@ -0,0 +1,34 @@
import { spawn } from 'child_process';
import { getArg } from './args.js';
import { error, log } from './log.js';
const ctx = 'getUser.js';
export const getUser = (user, path) => {
const url = `https://twitter.com/${user}/media`;
let args;
try {
args = getArg('args');
} catch (err) {
log(ctx, 'No args being provided to gallery-dl');
}
log(ctx, `python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`);
const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`, { shell: true });
proc.stdout.on('data', data => {
//log(ctx, data);
});
proc.stderr.on('data', data => {
error(ctx, data);
});
proc.on('error', err => {
error(ctx, err);
});
proc.on('close', code => {
log(ctx, `child process exited with code ${code}`);
});
return proc;
};