1
0
Fork 0
gallery-dl-archive-manager/lib/dl.js
Lightling 3933505fc6 better error handling
- rate limit is stderr, so need to check to make sure only finishing on other errors
- color errors in log
- log errors to userDb
- use fallback date in case dates weren't logged (e.g. /media failed)
- report on not-found users, which may indicate a username change or deleted account
- report on authorization errors
2024-02-10 15:07:54 -05:00

133 lines
4.9 KiB
JavaScript

import { spawn } from 'child_process';
import { getArg } from './args.js';
import { error, log } from './log.js';
const ctx = 'getUser.js';
const loggedDateRegex = new RegExp('[a-zA-Z0-9]+\-[0-9]+\-([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})([0-9]{2})([0-9]{2})', 'gm');
/**
* Gets a date formatted as yyyy-mm-dd for use with `/search`
* from the last logged date (which is assumed to be the oldest retrieved image from a previous run)
* @param {string[]} logs
*/
const getDateUrlFromLog = (logs) => {
// sometimes the logs get grouped into one single string, while others get individually logged
// may as well flatten it to a single string and test with regex to make consistent and avoid bugs
const flat = logs.flat().join('\n');
let result = loggedDateRegex.exec(flat), last = loggedDateRegex.exec(flat);
while (!!last) {
result = last;
last = loggedDateRegex.exec(flat);
}
if (!result) {
let fallbackDate = new Date();
return `${fallbackDate.getUTCFullYear()}-${fallbackDate.getUTCMonth()}-${fallbackDate.getUTCDate()}`
}
let date = new Date(result[1], result[2], result[3], result[4], result[5], result[6], 0);
// to be safe, spring forward a day
date.setDate(date.getDate() + 1);
return `${date.getUTCFullYear()}-${date.getUTCMonth()}-${date.getUTCDate()}`;
};
/**
* Runs {@link getUser} concurrently for many users
* @param {{ user: string, logs: string[] }[]} userDb userDb to run {@link getUser} on
* @param {number} threadMax max number of threads to run concurrently
* @param {string} directory the directory to save the user media folders in
* @param {'media' | 'search'} mode whether to retrieve from the `/media` route or `/search` API
* @returns {Promise<void>} promise which resolves once all threads for every user have finished
*/
export const getMany = (userDb, threadMax, directory, mode) => new Promise((resolve, reject) => {
let running = 0;
let index = 0;
const get = () => {
const checkError = (proc, currentIndex, type, codeOrError) => {
userDb[currentIndex].logs.push(codeOrError.toString())
if (!!proc.exitCode && !userDb[currentIndex].running) {
userDb[currentIndex].running = false;
onFinish(currentIndex, type);
}
};
const onFinish = (currentIndex, type) => {
log(ctx, `Finished ${userDb[currentIndex].user} under ${mode} mode.${type === 'close' ? '' : ' (Closed due to ' + type}`);
--running;
get();
};
while (running < threadMax && index < userDb.length) {
++running;
let currentIndex = index++;
const modeParams = mode === 'media' ? 'media' : {
from: '2007-12-31',
to: getDateUrlFromLog(userDb[currentIndex].logs),
};
let proc = getUser(userDb[currentIndex].user, directory, modeParams);
userDb[currentIndex].running = true;
proc.stdout.on('data', data => {
userDb[currentIndex].logs.push(data);
});
proc.stderr.on('data', data => checkError(proc, currentIndex, 'stderr', data));
proc.on('close', code => onFinish(currentIndex, 'close', code));
proc.on('error', err => checkError(proc, currentIndex, 'error', err));
proc.on('exit', code => checkError(proc, currentIndex, 'exit', code));
}
if (running === 0) {
resolve();
}
}
get();
});
/**
* Retrieves gallery for specified user and saves to the specified parent path
* @param {string} user the user to retrieve media from
* @param {string} path the path to save the user's media folder in
* @param {'media' | { from: string, to: string }} modeParams depending on the mode:
* - if `'media'`, will retrieve from /media
* - if object, will retrieve from /search?f=live&src=typed_query&q=(from%3A`<USER>`)%20since%3A<FROM|yyyy-mm-dd>%20until%3A`<TO|yyyy-mm-dd>`
* @returns {ChildProcess} the process that was {@link spawn spawned}
*/
export const getUser = (user, path, modeParams) => {
//"https://twitter.com/search?f=live&q=(from%3Aad_varg)%20until%3A2023-06-30%20since%3A2007-01-01&src=typed_query"
let url = `https://twitter.com/${user}/media`;
if (modeParams !== 'media') {
url = `https://twitter.com/search?f=live&src=typed_query&q=(from%3A${user})`;
if (!!modeParams.from) {
url += `%20since%3A${modeParams.from}`;
}
if (!!modeParams.to) {
url += `%20until%3A${modeParams.to}`;
}
}
let args;
try {
args = getArg('args');
} catch (err) {
log(ctx, 'No args being provided to gallery-dl');
}
log(ctx, `python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} "${url}"`);
const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} "${url}"`, { shell: true });
proc.stdout.on('data', data => {
log(ctx, data);
});
proc.stderr.on('data', data => {
error(ctx, data);
});
proc.on('error', err => {
error(ctx, err);
});
proc.on('close', code => {
log(ctx, `child process exited with code ${code}`);
});
return proc;
};