import { spawn } from 'child_process'; import { getArg, getSiteUrl, SITES } from './args.js'; import { error, log } from './log.js'; import { trimNewlinesEnd } from './str.js'; const ctx = 'getUser.js'; const loggedDateRegex = new RegExp('[a-zA-Z0-9]+\-[0-9]+\-([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]{2})([0-9]{2})([0-9]{2})', 'gm'); /** * Gets a date formatted as yyyy-mm-dd for use with `/search` * from the last logged date (which is assumed to be the oldest retrieved image from a previous run) * @param {string[]} logs */ const getDateUrlFromLog = (logs) => { // sometimes the logs get grouped into one single string, while others get individually logged // may as well flatten it to a single string and test with regex to make consistent and avoid bugs const flat = logs.flat().join('\n'); let result = loggedDateRegex.exec(flat), last = loggedDateRegex.exec(flat); while (!!last) { result = last; last = loggedDateRegex.exec(flat); } if (!result) { let fallbackDate = new Date(); return `${fallbackDate.getUTCFullYear()}-${fallbackDate.getUTCMonth()}-${fallbackDate.getUTCDate()}` } let date = new Date(result[1], result[2], result[3], result[4], result[5], result[6], 0); // to be safe, spring forward a day date.setDate(date.getDate() + 1); return `${date.getUTCFullYear()}-${date.getUTCMonth()}-${date.getUTCDate()}`; }; /** * Runs {@link getUser} concurrently for many users * @param {object} params params for the function to run * @param {{ user: string, logs: string[] }[]} params.userDb userDb to run {@link getUser} on * @param {number} params.threadMax max number of threads to run concurrently * @param {string} params.directory the directory to save the user media folders in * @param {'media' | 'search'} params.mode whether to retrieve from the `/media` route or `/search` API * @param {string} params.site the site to * @returns {Promise} promise which resolves once all threads for every user have finished */ export const getMany = (params) => new Promise((resolve, reject) => { let running = 0; let index = 0; const { userDb, threadMax, directory, mode, site } = params; const get = () => { const checkError = (currentIndex, type, codeOrError) => { userDb[currentIndex].logs.push(codeOrError.toString()); if (typeof codeOrError === 'number' && userDb[currentIndex].running === mode) { userDb[currentIndex].running = `finished ${mode}`; --running; log(ctx, `Finished (via ${type}) ${userDb[currentIndex].user} under ${mode} mode. ${userDb.filter(elem => elem.running !== `finished ${mode}`).length} users left.`); if (mode === 'media') { const logsParsed = userDb[currentIndex].logs.map(buf => buf.toString()); if (logsParsed[logsParsed.length - 2]?.includes('# ')) { userDb[currentIndex].shouldSkipSearch = true; } } get(); } }; while (running < threadMax && index < userDb.length) { let currentIndex = index++; if (mode === 'search') { if (userDb[currentIndex].shouldSkipSearch) { log(ctx, `Skipping ${userDb[currentIndex].user} because /media ended with a skipped file`); userDb[currentIndex].running = `finished ${mode}`; continue; } else if (!!userDb[currentIndex].lastError) { log(ctx, `Skipping ${userDb[currentIndex].user} because of error: ${userDb[currentIndex].lastError}`); userDb[currentIndex].running = `finished ${mode}`; continue; } } ++running; const modeParams = mode === 'media' ? 'media' : { from: '2007-12-31', to: getDateUrlFromLog(userDb[currentIndex].logs), }; let proc = getUser({ user: userDb[currentIndex].user, path: directory, modeParams, site, }); userDb[currentIndex].running = mode; proc.stdout.on('data', data => { userDb[currentIndex].logs.push(trimNewlinesEnd(data)); }); proc.stderr.on('data', data => checkError(currentIndex, 'stderr', trimNewlinesEnd(data))); proc.on('close', code => checkError(currentIndex, 'close', code)); proc.on('error', err => checkError(currentIndex, 'error', trimNewlinesEnd(err))); proc.on('exit', code => checkError(currentIndex, 'exit', code)); } if (running === 0) { resolve(); } } get(); }); /** * Retrieves gallery for specified user and saves to the specified parent path * @param {object} params the params to run the function * @param {string} params.user the user to retrieve media from * @param {string} params.path the path to save the user's media folder in * @param {'media' | { from: string, to: string }} params.modeParams depending on the mode: * - if `'media'`, will retrieve from /media * - if object, will retrieve from /search?f=live&src=typed_query&q=(from%3A``)%20since%3A%20until%3A`` * @param {string} params.site the site to pull from * @returns {ChildProcess} the process that was {@link spawn spawned} */ export const getUser = (params) => { const { user, path, modeParams, site } = params; let url = getSiteUrl(site, user); if (modeParams !== 'media' && site === SITES.TWITTER) { url = `https://twitter.com/search?f=live&src=typed_query&q=(from%3A${user})`; if (!!modeParams.from) { url += `%20since%3A${modeParams.from}`; } if (!!modeParams.to) { url += `%20until%3A${modeParams.to}`; } } let skip; try { skip = modeParams === 'media' ? getArg('skipMediaAfter') : getArg('skipSearchAfter'); log(ctx, `Aborting after ${skip} skipped media`); } catch (err) { log(ctx, 'Not aborting after skipped media'); } let args; try { args = getArg('args'); } catch (err) { log(ctx, 'No args being provided to gallery-dl'); } log(ctx, `python3 ~/.local/bin/gallery-dl -c ./config.json${!!skip ? ' -A ' + skip : ''}${!!args ? ' ' + args : ''} -d ${path} "${url}"`); const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!skip ? ' -A ' + skip : ''}${!!args ? ' ' + args : ''} -d ${path} "${url}"`, { shell: true }); proc.stdout.on('data', data => { log(ctx, `${data.toString().startsWith('# ') ? '\x1b[90mSkipped ' + data.toString().trim() + '\x1b[0m' : '\x1b[36mDownloaded ' + data.toString().trim() + '\x1b[0m'}`); }); proc.stderr.on('data', data => { error(ctx, data); }); proc.on('error', err => { error(ctx, err); }); proc.on('close', code => { log(ctx, `child process exited with code ${code}`); }); return proc; };