166 lines
6.5 KiB
JavaScript
166 lines
6.5 KiB
JavaScript
import { spawn } from 'child_process';
|
|
|
|
import { getArg, getSiteUrl, SITES } from './args.js';
|
|
import { error, log } from './log.js';
|
|
import { trimNewlinesEnd } from './str.js';
|
|
|
|
const ctx = 'getUser.js';
|
|
const loggedDateRegex = new RegExp('[a-zA-Z0-9]+\-[0-9]+\-([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})([0-9]{2})([0-9]{2})', 'gm');
|
|
|
|
/**
|
|
* Gets a date formatted as yyyy-mm-dd for use with `/search`
|
|
* from the last logged date (which is assumed to be the oldest retrieved image from a previous run)
|
|
* @param {string[]} logs
|
|
*/
|
|
const getDateUrlFromLog = (logs) => {
|
|
// sometimes the logs get grouped into one single string, while others get individually logged
|
|
// may as well flatten it to a single string and test with regex to make consistent and avoid bugs
|
|
const flat = logs.flat().join('\n');
|
|
let result = loggedDateRegex.exec(flat), last = loggedDateRegex.exec(flat);
|
|
while (!!last) {
|
|
result = last;
|
|
last = loggedDateRegex.exec(flat);
|
|
}
|
|
if (!result) {
|
|
let fallbackDate = new Date();
|
|
|
|
return `${fallbackDate.getUTCFullYear()}-${fallbackDate.getUTCMonth()}-${fallbackDate.getUTCDate()}`
|
|
}
|
|
let date = new Date(result[1], result[2], result[3], result[4], result[5], result[6], 0);
|
|
// to be safe, spring forward a day
|
|
date.setDate(date.getDate() + 1);
|
|
|
|
return `${date.getUTCFullYear()}-${date.getUTCMonth()}-${date.getUTCDate()}`;
|
|
};
|
|
|
|
/**
|
|
* Runs {@link getUser} concurrently for many users
|
|
* @param {object} params params for the function to run
|
|
* @param {{ user: string, logs: string[] }[]} params.userDb userDb to run {@link getUser} on
|
|
* @param {number} params.threadMax max number of threads to run concurrently
|
|
* @param {string} params.directory the directory to save the user media folders in
|
|
* @param {'media' | 'search'} params.mode whether to retrieve from the `/media` route or `/search` API
|
|
* @param {string} params.site the site to
|
|
* @returns {Promise<void>} promise which resolves once all threads for every user have finished
|
|
*/
|
|
export const getMany = (params) => new Promise((resolve, reject) => {
|
|
let running = 0;
|
|
let index = 0;
|
|
const { userDb, threadMax, directory, mode, site } = params;
|
|
|
|
const get = () => {
|
|
const checkError = (currentIndex, type, codeOrError) => {
|
|
userDb[currentIndex].logs.push(codeOrError.toString());
|
|
if (typeof codeOrError === 'number' && userDb[currentIndex].running === mode) {
|
|
userDb[currentIndex].running = `finished ${mode}`;
|
|
--running;
|
|
log(ctx, `Finished (via ${type}) ${userDb[currentIndex].user} under ${mode} mode. ${userDb.filter(elem => elem.running !== `finished ${mode}`).length} users left.`);
|
|
if (mode === 'media') {
|
|
const logsParsed = userDb[currentIndex].logs.map(buf => buf.toString());
|
|
if (logsParsed[logsParsed.length - 2]?.includes('# ')) {
|
|
userDb[currentIndex].shouldSkipSearch = true;
|
|
}
|
|
}
|
|
get();
|
|
}
|
|
};
|
|
|
|
while (running < threadMax && index < userDb.length) {
|
|
let currentIndex = index++;
|
|
if (mode === 'search') {
|
|
if (userDb[currentIndex].shouldSkipSearch) {
|
|
log(ctx, `Skipping ${userDb[currentIndex].user} because /media ended with a skipped file`);
|
|
userDb[currentIndex].running = `finished ${mode}`;
|
|
continue;
|
|
} else if (!!userDb[currentIndex].lastError) {
|
|
log(ctx, `Skipping ${userDb[currentIndex].user} because of error: ${userDb[currentIndex].lastError}`);
|
|
userDb[currentIndex].running = `finished ${mode}`;
|
|
continue;
|
|
}
|
|
}
|
|
++running;
|
|
const modeParams = mode === 'media' ? 'media' : {
|
|
from: '2007-12-31',
|
|
to: getDateUrlFromLog(userDb[currentIndex].logs),
|
|
};
|
|
|
|
let proc = getUser({
|
|
user: userDb[currentIndex].user,
|
|
path: directory,
|
|
modeParams,
|
|
site,
|
|
});
|
|
userDb[currentIndex].running = mode;
|
|
proc.stdout.on('data', data => {
|
|
userDb[currentIndex].logs.push(trimNewlinesEnd(data));
|
|
});
|
|
proc.stderr.on('data', data => checkError(currentIndex, 'stderr', trimNewlinesEnd(data)));
|
|
proc.on('close', code => checkError(currentIndex, 'close', code));
|
|
proc.on('error', err => checkError(currentIndex, 'error', trimNewlinesEnd(err)));
|
|
proc.on('exit', code => checkError(currentIndex, 'exit', code));
|
|
}
|
|
|
|
if (running === 0) {
|
|
resolve();
|
|
}
|
|
}
|
|
get();
|
|
});
|
|
|
|
/**
|
|
* Retrieves gallery for specified user and saves to the specified parent path
|
|
* @param {object} params the params to run the function
|
|
* @param {string} params.user the user to retrieve media from
|
|
* @param {string} params.path the path to save the user's media folder in
|
|
* @param {'media' | { from: string, to: string }} params.modeParams depending on the mode:
|
|
* - if `'media'`, will retrieve from /media
|
|
* - if object, will retrieve from /search?f=live&src=typed_query&q=(from%3A`<USER>`)%20since%3A<FROM|yyyy-mm-dd>%20until%3A`<TO|yyyy-mm-dd>`
|
|
* @param {string} params.site the site to pull from
|
|
* @returns {ChildProcess} the process that was {@link spawn spawned}
|
|
*/
|
|
export const getUser = (params) => {
|
|
const { user, path, modeParams, site } = params;
|
|
let url = getSiteUrl(site, user);
|
|
if (modeParams !== 'media' && site === SITES.TWITTER) {
|
|
url = `https://twitter.com/search?f=live&src=typed_query&q=(from%3A${user})`;
|
|
if (!!modeParams.from) {
|
|
url += `%20since%3A${modeParams.from}`;
|
|
}
|
|
if (!!modeParams.to) {
|
|
url += `%20until%3A${modeParams.to}`;
|
|
}
|
|
}
|
|
let skip;
|
|
try {
|
|
skip = modeParams === 'media'
|
|
? getArg('skipMediaAfter')
|
|
: getArg('skipSearchAfter');
|
|
log(ctx, `Aborting after ${skip} skipped media`);
|
|
} catch (err) {
|
|
log(ctx, 'Not aborting after skipped media');
|
|
}
|
|
let args;
|
|
try {
|
|
args = getArg('args');
|
|
} catch (err) {
|
|
log(ctx, 'No args being provided to gallery-dl');
|
|
}
|
|
|
|
log(ctx, `python3 ~/.local/bin/gallery-dl -c ./config.json${!!skip ? ' -A ' + skip : ''}${!!args ? ' ' + args : ''} -d ${path} "${url}"`);
|
|
const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!skip ? ' -A ' + skip : ''}${!!args ? ' ' + args : ''} -d ${path} "${url}"`, { shell: true });
|
|
|
|
proc.stdout.on('data', data => {
|
|
log(ctx, `${data.toString().startsWith('# ') ? '\x1b[90mSkipped ' + data.toString().trim() + '\x1b[0m' : '\x1b[36mDownloaded ' + data.toString().trim() + '\x1b[0m'}`);
|
|
});
|
|
proc.stderr.on('data', data => {
|
|
error(ctx, data);
|
|
});
|
|
proc.on('error', err => {
|
|
error(ctx, err);
|
|
});
|
|
proc.on('close', code => {
|
|
log(ctx, `child process exited with code ${code}`);
|
|
});
|
|
|
|
return proc;
|
|
};
|