concurrent retrieval with user-limited threadcount
This commit is contained in:
parent
0a9af1b964
commit
eb908dd8f0
3 changed files with 134 additions and 0 deletions
74
buildDb.js
Normal file
74
buildDb.js
Normal file
|
@ -0,0 +1,74 @@
|
|||
import { readFile } from 'fs/promises';
|
||||
|
||||
import { getArg } from './args.js';
|
||||
import { getUser } from './getUser.js';
|
||||
import { error, log } from './log.js';
|
||||
|
||||
const ctx = 'buildDb.js';
|
||||
|
||||
const getMany = (processes, threadMax, directory) => new Promise((resolve, reject) => {
|
||||
let running = 0;
|
||||
let index = 0;
|
||||
|
||||
const get = () => {
|
||||
const onFinish = (currentIndex) => {
|
||||
log(ctx, `Finished ${processes[currentIndex].user}/media`);
|
||||
--running;
|
||||
get();
|
||||
}
|
||||
|
||||
while (running < threadMax && index < processes.length) {
|
||||
++running;
|
||||
let currentIndex = index++;
|
||||
|
||||
let proc = getUser(processes[currentIndex].user, directory);
|
||||
proc.stdout.on('data', data => {
|
||||
processes[currentIndex].logs.push(data);
|
||||
});
|
||||
proc.stderr.on('data', _ => onFinish(currentIndex));
|
||||
proc.on('close', _ => onFinish(currentIndex));
|
||||
proc.on('error', _ => onFinish(currentIndex));
|
||||
}
|
||||
|
||||
if (running === 0) {
|
||||
resolve();
|
||||
}
|
||||
}
|
||||
get();
|
||||
});
|
||||
|
||||
const buildDb = async () => {
|
||||
log(ctx, 'Grabbing db');
|
||||
let directory = '', threadMax = 1, db;
|
||||
try {
|
||||
directory = getArg('path');
|
||||
} catch (err) {
|
||||
error(ctx, err);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
threadMax = getArg('threads');
|
||||
log(ctx, `Using ${threadMax} threads`);
|
||||
} catch (err) {
|
||||
log(ctx, 'Using 1 thread');
|
||||
}
|
||||
try {
|
||||
let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' });
|
||||
db = JSON.parse(file);
|
||||
} catch (err) {
|
||||
error(ctx, err);
|
||||
return;
|
||||
}
|
||||
|
||||
let processes = db.map(entry => ({
|
||||
...entry,
|
||||
logs: [],
|
||||
}));
|
||||
|
||||
log(ctx, `Building db using <user>/media for ${processes.length} users`);
|
||||
await getMany(processes, threadMax, directory);
|
||||
|
||||
log(ctx, 'Building db using /search');
|
||||
}
|
||||
|
||||
buildDb();
|
26
config.json
Normal file
26
config.json
Normal file
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"extractor": {
|
||||
"twitter": {
|
||||
"base-directory": ".",
|
||||
"directory": [
|
||||
"{author['name']}"
|
||||
],
|
||||
"filename": {
|
||||
"locals().get('bitrate', 0) > 0": "{author['name']}-{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}-vid{num}.{extension}",
|
||||
"locals().get('bitrate') == 0": "{author['name']}-{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}-gif{num}.{extension}",
|
||||
"": "{author['name']}-{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}-img{num}.{extension}"
|
||||
},
|
||||
"cookies": "$HOME/.config/gallery-dl/twitter-cookies.txt",
|
||||
"timeline": {
|
||||
"strategy": "media"
|
||||
},
|
||||
"postprocessors": [
|
||||
{
|
||||
"name": "metadata",
|
||||
"event": "post",
|
||||
"filename": "{author[name]}_{tweet_id}-{date:Olocal/%Y%m%d_%H%M%S}.json"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
34
getUser.js
Normal file
34
getUser.js
Normal file
|
@ -0,0 +1,34 @@
|
|||
import { spawn } from 'child_process';
|
||||
|
||||
import { getArg } from './args.js';
|
||||
import { error, log } from './log.js';
|
||||
|
||||
const ctx = 'getUser.js';
|
||||
|
||||
export const getUser = (user, path) => {
|
||||
const url = `https://twitter.com/${user}/media`;
|
||||
let args;
|
||||
try {
|
||||
args = getArg('args');
|
||||
} catch (err) {
|
||||
log(ctx, 'No args being provided to gallery-dl');
|
||||
}
|
||||
|
||||
log(ctx, `python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`);
|
||||
const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`, { shell: true });
|
||||
|
||||
proc.stdout.on('data', data => {
|
||||
//log(ctx, data);
|
||||
});
|
||||
proc.stderr.on('data', data => {
|
||||
error(ctx, data);
|
||||
});
|
||||
proc.on('error', err => {
|
||||
error(ctx, err);
|
||||
});
|
||||
proc.on('close', code => {
|
||||
log(ctx, `child process exited with code ${code}`);
|
||||
});
|
||||
|
||||
return proc;
|
||||
};
|
Loading…
Add table
Reference in a new issue