From a0f7e16fc367eff6321960e59568eedc9534ee45 Mon Sep 17 00:00:00 2001 From: Lightling Date: Tue, 21 Jan 2025 20:29:38 -0500 Subject: [PATCH] begin to support bsky --- README.md | 1 + lib/args.js | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/dl.js | 38 ++++++++++++++++++++++-------------- 3 files changed, 80 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index cfd9a9b..4e3465d 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ Adds new user(s) to the db and initiate a full download similar to `run-download Args: - `--users={comma,separated,userlist}` +- `--site={"twitter"|"bluesky"}` - `--path={/path/to/your/archive}` - `--threads={#}` - `--args={gallery-dl args}` diff --git a/lib/args.js b/lib/args.js index 1ccfee6..d0e98b6 100644 --- a/lib/args.js +++ b/lib/args.js @@ -1,3 +1,8 @@ +export const SITES = { + TWITTER: 'twitter', + BLUESKY: 'bluesky', +}; + let argRegex = new RegExp('(\-\-[a-zA-Z0-9]+)(=)(.*)'); /** @@ -36,3 +41,53 @@ export const getArg = (argName) => { throw(`${argName} was not supplied!`); } } + +/** + * Determines if the provided arg is a supported site option and maps it to the name used throughout the repo + * @param {string} arg the name of the arg to validate as a site option + * @returns {string} the name of the site used throughout the repo + * @throws general error if site isn't supported + */ +export const validateSiteArg = (arg) => { + if (!arg) { + throw('site arg was not supplied!'); + } + + switch (arg.toLowerCase().trim()) { + case 'bsky': + case 'bluesky': { + return SITES.BLUESKY; + } + case 'twitter': + case 'twr': + case 'x': + case 'xitter': + case 'X, The Everything App (TM)': + case 'shitter': { + return SITES.TWITTER; + } + default: { + throw(`${arg} is not a supported site`); + } + } +}; + +/** + * Grabs the url of the site specified + * @param {string} site the site to get the url from + * @param {string} user the user to retrieve + * @returns {string} the url of the site + */ +export const getSiteUrl = (site, user) => { + switch (site) { + case SITES.BLUESKY: { + return `https://bsky.app/profile/${user}`; + } + case SITES.TWITTER: { + return `https://twitter.com/${user}/media`; + } + default: { + throw(`${site} was not valid`); + } + } +}; diff --git a/lib/dl.js b/lib/dl.js index c0bae4b..db1f8da 100644 --- a/lib/dl.js +++ b/lib/dl.js @@ -1,6 +1,6 @@ import { spawn } from 'child_process'; -import { getArg } from './args.js'; +import { getArg, getSiteUrl, SITES } from './args.js'; import { error, log } from './log.js'; import { trimNewlinesEnd } from './str.js'; @@ -35,15 +35,18 @@ const getDateUrlFromLog = (logs) => { /** * Runs {@link getUser} concurrently for many users - * @param {{ user: string, logs: string[] }[]} userDb userDb to run {@link getUser} on - * @param {number} threadMax max number of threads to run concurrently - * @param {string} directory the directory to save the user media folders in - * @param {'media' | 'search'} mode whether to retrieve from the `/media` route or `/search` API + * @param {object} params params for the function to run + * @param {{ user: string, logs: string[] }[]} params.userDb userDb to run {@link getUser} on + * @param {number} params.threadMax max number of threads to run concurrently + * @param {string} params.directory the directory to save the user media folders in + * @param {'media' | 'search'} params.mode whether to retrieve from the `/media` route or `/search` API + * @param {string} params.site the site to * @returns {Promise} promise which resolves once all threads for every user have finished */ -export const getMany = (userDb, threadMax, directory, mode) => new Promise((resolve, reject) => { +export const getMany = (params) => new Promise((resolve, reject) => { let running = 0; let index = 0; + const { userDb, threadMax, directory, mode, site } = params; const get = () => { const checkError = (currentIndex, type, codeOrError) => { @@ -81,7 +84,12 @@ export const getMany = (userDb, threadMax, directory, mode) => new Promise((reso to: getDateUrlFromLog(userDb[currentIndex].logs), }; - let proc = getUser(userDb[currentIndex].user, directory, modeParams); + let proc = getUser({ + user: userDb[currentIndex].user, + path: directory, + modeParams, + site, + }); userDb[currentIndex].running = mode; proc.stdout.on('data', data => { userDb[currentIndex].logs.push(trimNewlinesEnd(data)); @@ -101,17 +109,19 @@ export const getMany = (userDb, threadMax, directory, mode) => new Promise((reso /** * Retrieves gallery for specified user and saves to the specified parent path - * @param {string} user the user to retrieve media from - * @param {string} path the path to save the user's media folder in - * @param {'media' | { from: string, to: string }} modeParams depending on the mode: + * @param {object} params the params to run the function + * @param {string} params.user the user to retrieve media from + * @param {string} params.path the path to save the user's media folder in + * @param {'media' | { from: string, to: string }} params.modeParams depending on the mode: * - if `'media'`, will retrieve from /media * - if object, will retrieve from /search?f=live&src=typed_query&q=(from%3A``)%20since%3A%20until%3A`` + * @param {string} params.site the site to pull from * @returns {ChildProcess} the process that was {@link spawn spawned} */ -export const getUser = (user, path, modeParams) => { - //"https://twitter.com/search?f=live&q=(from%3Aad_varg)%20until%3A2023-06-30%20since%3A2007-01-01&src=typed_query" - let url = `https://twitter.com/${user}/media`; - if (modeParams !== 'media') { +export const getUser = (params) => { + const { user, path, modeParams, site } = params; + let url = getSiteUrl(site, user); + if (modeParams !== 'media' && site === SITES.TWITTER) { url = `https://twitter.com/search?f=live&src=typed_query&q=(from%3A${user})`; if (!!modeParams.from) { url += `%20since%3A${modeParams.from}`;