1
0
Fork 0

begin to support bsky

This commit is contained in:
lightling 2025-01-21 20:29:38 -05:00
parent e806dc23ae
commit a0f7e16fc3
3 changed files with 80 additions and 14 deletions

View file

@ -31,6 +31,7 @@ Adds new user(s) to the db and initiate a full download similar to `run-download
Args:
- `--users={comma,separated,userlist}`
- `--site={"twitter"|"bluesky"}`
- `--path={/path/to/your/archive}`
- `--threads={#}`
- `--args={gallery-dl args}`

View file

@ -1,3 +1,8 @@
export const SITES = {
TWITTER: 'twitter',
BLUESKY: 'bluesky',
};
let argRegex = new RegExp('(\-\-[a-zA-Z0-9]+)(=)(.*)');
/**
@ -36,3 +41,53 @@ export const getArg = (argName) => {
throw(`${argName} was not supplied!`);
}
}
/**
* Determines if the provided arg is a supported site option and maps it to the name used throughout the repo
* @param {string} arg the name of the arg to validate as a site option
* @returns {string} the name of the site used throughout the repo
* @throws general error if site isn't supported
*/
export const validateSiteArg = (arg) => {
if (!arg) {
throw('site arg was not supplied!');
}
switch (arg.toLowerCase().trim()) {
case 'bsky':
case 'bluesky': {
return SITES.BLUESKY;
}
case 'twitter':
case 'twr':
case 'x':
case 'xitter':
case 'X, The Everything App (TM)':
case 'shitter': {
return SITES.TWITTER;
}
default: {
throw(`${arg} is not a supported site`);
}
}
};
/**
* Grabs the url of the site specified
* @param {string} site the site to get the url from
* @param {string} user the user to retrieve
* @returns {string} the url of the site
*/
export const getSiteUrl = (site, user) => {
switch (site) {
case SITES.BLUESKY: {
return `https://bsky.app/profile/${user}`;
}
case SITES.TWITTER: {
return `https://twitter.com/${user}/media`;
}
default: {
throw(`${site} was not valid`);
}
}
};

View file

@ -1,6 +1,6 @@
import { spawn } from 'child_process';
import { getArg } from './args.js';
import { getArg, getSiteUrl, SITES } from './args.js';
import { error, log } from './log.js';
import { trimNewlinesEnd } from './str.js';
@ -35,15 +35,18 @@ const getDateUrlFromLog = (logs) => {
/**
* Runs {@link getUser} concurrently for many users
* @param {{ user: string, logs: string[] }[]} userDb userDb to run {@link getUser} on
* @param {number} threadMax max number of threads to run concurrently
* @param {string} directory the directory to save the user media folders in
* @param {'media' | 'search'} mode whether to retrieve from the `/media` route or `/search` API
* @param {object} params params for the function to run
* @param {{ user: string, logs: string[] }[]} params.userDb userDb to run {@link getUser} on
* @param {number} params.threadMax max number of threads to run concurrently
* @param {string} params.directory the directory to save the user media folders in
* @param {'media' | 'search'} params.mode whether to retrieve from the `/media` route or `/search` API
* @param {string} params.site the site to
* @returns {Promise<void>} promise which resolves once all threads for every user have finished
*/
export const getMany = (userDb, threadMax, directory, mode) => new Promise((resolve, reject) => {
export const getMany = (params) => new Promise((resolve, reject) => {
let running = 0;
let index = 0;
const { userDb, threadMax, directory, mode, site } = params;
const get = () => {
const checkError = (currentIndex, type, codeOrError) => {
@ -81,7 +84,12 @@ export const getMany = (userDb, threadMax, directory, mode) => new Promise((reso
to: getDateUrlFromLog(userDb[currentIndex].logs),
};
let proc = getUser(userDb[currentIndex].user, directory, modeParams);
let proc = getUser({
user: userDb[currentIndex].user,
path: directory,
modeParams,
site,
});
userDb[currentIndex].running = mode;
proc.stdout.on('data', data => {
userDb[currentIndex].logs.push(trimNewlinesEnd(data));
@ -101,17 +109,19 @@ export const getMany = (userDb, threadMax, directory, mode) => new Promise((reso
/**
* Retrieves gallery for specified user and saves to the specified parent path
* @param {string} user the user to retrieve media from
* @param {string} path the path to save the user's media folder in
* @param {'media' | { from: string, to: string }} modeParams depending on the mode:
* @param {object} params the params to run the function
* @param {string} params.user the user to retrieve media from
* @param {string} params.path the path to save the user's media folder in
* @param {'media' | { from: string, to: string }} params.modeParams depending on the mode:
* - if `'media'`, will retrieve from /media
* - if object, will retrieve from /search?f=live&src=typed_query&q=(from%3A`<USER>`)%20since%3A<FROM|yyyy-mm-dd>%20until%3A`<TO|yyyy-mm-dd>`
* @param {string} params.site the site to pull from
* @returns {ChildProcess} the process that was {@link spawn spawned}
*/
export const getUser = (user, path, modeParams) => {
//"https://twitter.com/search?f=live&q=(from%3Aad_varg)%20until%3A2023-06-30%20since%3A2007-01-01&src=typed_query"
let url = `https://twitter.com/${user}/media`;
if (modeParams !== 'media') {
export const getUser = (params) => {
const { user, path, modeParams, site } = params;
let url = getSiteUrl(site, user);
if (modeParams !== 'media' && site === SITES.TWITTER) {
url = `https://twitter.com/search?f=live&src=typed_query&q=(from%3A${user})`;
if (!!modeParams.from) {
url += `%20since%3A${modeParams.from}`;