diff --git a/lib/args.js b/lib/args.js index e786010..1ccfee6 100644 --- a/lib/args.js +++ b/lib/args.js @@ -1,5 +1,11 @@ let argRegex = new RegExp('(\-\-[a-zA-Z0-9]+)(=)(.*)'); +/** + * Parses args from node process + * @returns { {[ key: string ]: string | string[]} } parsed args array; + * if the arg followed format `--arg=val`, it will be indexed as `args[arg]=val`; + * otherwise it will be pushed to an array under `args['_']` + */ const parseArgs = () => { let _args = { '_': [] }; [...process.argv.slice(2)].forEach(arg => { @@ -16,6 +22,12 @@ const parseArgs = () => { export const args = parseArgs(); +/** + * Gets value of specified arg + * @param {string} argName the name of the arg to retrieve + * @returns {string} value of specified arg + * @throws will throw error if arg was not specified + */ export const getArg = (argName) => { let arg = args[argName]; if (!!arg) { diff --git a/lib/dl.js b/lib/dl.js index 74ce257..36a68bb 100644 --- a/lib/dl.js +++ b/lib/dl.js @@ -5,6 +5,50 @@ import { error, log } from './log.js'; const ctx = 'getUser.js'; +/** + * Runs {@link getUser} concurrently for many users + * @param {{ user: string, logs: string[] }[]} userDb userDb to run {@link getUser} on + * @param {number} threadMax max number of threads to run concurrently + * @param {string} directory the directory to save the user media folders in + * @returns {Promise} promise which resolves once all threads for every user have finished + */ +export const getMany = (userDb, threadMax, directory) => new Promise((resolve, reject) => { + let running = 0; + let index = 0; + + const get = () => { + const onFinish = (currentIndex) => { + log(ctx, `Finished ${userDb[currentIndex].user}/media`); + --running; + get(); + } + + while (running < threadMax && index < userDb.length) { + ++running; + let currentIndex = index++; + + let proc = getUser(userDb[currentIndex].user, directory); + proc.stdout.on('data', data => { + userDb[currentIndex].logs.push(data); + }); + proc.stderr.on('data', _ => onFinish(currentIndex)); + proc.on('close', _ => onFinish(currentIndex)); + proc.on('error', _ => onFinish(currentIndex)); + } + + if (running === 0) { + resolve(); + } + } + get(); +}); + +/** + * Retrieves gallery for specified user and saves to the specified parent path + * @param {string} user the user to retrieve media from + * @param {string} path the path to save the user's media folder in + * @returns {ChildProcess} the process that was {@link spawn spawned} + */ export const getUser = (user, path) => { const url = `https://twitter.com/${user}/media`; let args; @@ -18,7 +62,7 @@ export const getUser = (user, path) => { const proc = spawn(`python3 ~/.local/bin/gallery-dl -c ./config.json${!!args ? ' ' + args + ' ' : ' '}-d ${path} ${url}`, { shell: true }); proc.stdout.on('data', data => { - //log(ctx, data); + log(ctx, data); }); proc.stderr.on('data', data => { error(ctx, data); diff --git a/lib/io.js b/lib/io.js index b62b8f8..270d20b 100644 --- a/lib/io.js +++ b/lib/io.js @@ -1,6 +1,11 @@ import { readdir } from 'fs/promises' -export const getChildren = async source => +/** + * Gets the directories under the specified source directory + * @param {string} source path to the parent directory + * @returns {Promise} the names of the child directories + */ +export const getChildDirectories = async source => (await readdir(source, { withFileTypes: true })) .filter(dirent => dirent.isDirectory()) .map(dirent => dirent.name) diff --git a/lib/log.js b/lib/log.js index 2a13020..0e91329 100644 --- a/lib/log.js +++ b/lib/log.js @@ -2,15 +2,29 @@ const options = { year: 'numeric', day: '2-digit', month: '2-digit', hour: '2-di const osLocale = (process.env.LANG || process.env.LANGUAGE || process.env.LC_ALL || process.env.LC_MESSAGES).split('.')[0].replace('_', '-'); const dtFormat = Intl.DateTimeFormat(osLocale, options); +/** + * Gets formatted timestamp + * @returns {string} timestamp string + */ export const getTime = () => { return dtFormat.format(new Date()); } +/** + * Logs formatted output to the console + * @param {string} src the source script doing the logging + * @param {string} msg the message to log + */ export const log = (src, msg) => { const time = getTime(); console.log(`${time} : ${src} : ${msg}`); }; +/** + * Logs formatted output to the console as an error + * @param {string} src the source script doing the logging + * @param {string} msg the message to log + */ export const error = (src, msg) => { const time = new Date().toISOString(); console.error(`${time} : ${src} : ${msg}`); diff --git a/run-buildDb.js b/run-buildDb.js deleted file mode 100644 index d631924..0000000 --- a/run-buildDb.js +++ /dev/null @@ -1,74 +0,0 @@ -import { readFile } from 'fs/promises'; - -import { getArg } from './lib/args.js'; -import { getUser } from './lib/dl.js'; -import { error, log } from './lib/log.js'; - -const ctx = 'buildDb.js'; - -const getMany = (processes, threadMax, directory) => new Promise((resolve, reject) => { - let running = 0; - let index = 0; - - const get = () => { - const onFinish = (currentIndex) => { - log(ctx, `Finished ${processes[currentIndex].user}/media`); - --running; - get(); - } - - while (running < threadMax && index < processes.length) { - ++running; - let currentIndex = index++; - - let proc = getUser(processes[currentIndex].user, directory); - proc.stdout.on('data', data => { - processes[currentIndex].logs.push(data); - }); - proc.stderr.on('data', _ => onFinish(currentIndex)); - proc.on('close', _ => onFinish(currentIndex)); - proc.on('error', _ => onFinish(currentIndex)); - } - - if (running === 0) { - resolve(); - } - } - get(); -}); - -const buildDb = async () => { - log(ctx, 'Grabbing db'); - let directory = '', threadMax = 1, db; - try { - directory = getArg('path'); - } catch (err) { - error(ctx, err); - return; - } - try { - threadMax = getArg('threads'); - log(ctx, `Using ${threadMax} threads`); - } catch (err) { - log(ctx, 'Using 1 thread'); - } - try { - let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' }); - db = JSON.parse(file); - } catch (err) { - error(ctx, err); - return; - } - - let processes = db.map(entry => ({ - ...entry, - logs: [], - })); - - log(ctx, `Building db using /media for ${processes.length} users`); - await getMany(processes, threadMax, directory); - - log(ctx, 'Building db using /search'); -} - -buildDb(); diff --git a/run-downloadDb.js b/run-downloadDb.js new file mode 100644 index 0000000..ccacb11 --- /dev/null +++ b/run-downloadDb.js @@ -0,0 +1,48 @@ +import { readFile } from 'fs/promises'; + +import { getArg } from './lib/args.js'; +import { getMany } from './lib/dl.js'; +import { error, log } from './lib/log.js'; + +const ctx = 'downloadDb.js'; + +/** + * Downloads all media possible for the users stored in db.json at the specified `--path`. + * Useful for first run or for augmenting existing media + * if it may be only partially archived in an uncertain state. + */ +const downloadDb = async () => { + log(ctx, 'Grabbing db'); + let directory = '', threadMax = 1, db; + try { + directory = getArg('path'); + } catch (err) { + error(ctx, err); + return; + } + try { + threadMax = getArg('threads'); + log(ctx, `Using ${threadMax} threads`); + } catch (err) { + log(ctx, 'Using 1 thread'); + } + try { + let file = await readFile(`${directory}/db.json`, { encoding: 'utf8' }); + db = JSON.parse(file); + } catch (err) { + error(ctx, err); + return; + } + + let processes = db.map(entry => ({ + ...entry, + logs: [], + })); + + log(ctx, `Building db using /media for ${processes.length} users`); + await getMany(processes, threadMax, directory); + + log(ctx, 'Building db using /search'); +} + +downloadDb(); diff --git a/run-initDb.js b/run-initDb.js index 29adb6c..18db851 100644 --- a/run-initDb.js +++ b/run-initDb.js @@ -1,11 +1,16 @@ import { writeFile } from 'fs/promises'; import { getArg } from './lib/args.js'; -import { getChildren } from './lib/io.js'; +import { getChildDirectories } from './lib/io.js'; import { error, log } from './lib/log.js'; const ctx = 'initDb.js'; +/** + * Initializes a user db from a list of existing directories + * at the specified `--path` parameter when executing the command. + * Useful when there is already a collection of folders. + */ const initDb = async () => { log(ctx, 'Grabbing existing directories'); let directory = ''; @@ -15,7 +20,7 @@ const initDb = async () => { error(ctx, err); return; } - const children = await getChildren(directory); + const children = await getChildDirectories(directory); const db = children.map(child => ({ 'user': child, 'lastUpdated': 'never',