From e3af35a68e32e7e5017f0d602b0cdb8d00352daa Mon Sep 17 00:00:00 2001 From: ewin Date: Thu, 27 Mar 2025 10:48:33 -0400 Subject: [PATCH] break things up, do some proper mediawiki API stuff to support editing --- edb-id-bot.mjs | 130 ++++++++--------------------------- lodestone.mjs | 32 +++++++++ mediawiki.mjs | 179 +++++++++++++++++++++++++++++++++++++++++++++++++ util.mjs | 20 ++++++ 4 files changed, 260 insertions(+), 101 deletions(-) create mode 100644 lodestone.mjs create mode 100644 mediawiki.mjs create mode 100644 util.mjs diff --git a/edb-id-bot.mjs b/edb-id-bot.mjs index 86427bb..8d9a3b3 100755 --- a/edb-id-bot.mjs +++ b/edb-id-bot.mjs @@ -1,52 +1,8 @@ #!/usr/bin/env node -import {execSync} from 'node:child_process'; -import makeFetchCookie from 'fetch-cookie'; - -const fetchWithCookies = makeFetchCookie(fetch); - -/** - * @see https://stackoverflow.com/a/6969486 - * @param {string} - * @returns {string} - */ -const regExpEscape = str => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - -/** - * Creates a regular expression that matches a link to the named item and - * captures its EDB ID from the matched link's `href` attribute. - * @param {string} name - * @returns {RegExp} - */ -const itemLinkRegExp = name => new RegExp(`]*>(?${regExpEscape(name)})`, 'i'); - -/** - * Gets the ID of the named item in Eorzea Database. - * @param {string} name - * @returns {Promise} - */ -async function findItemID (name) { - // execute a search for the item's name - const searchURL = `https://na.finalfantasyxiv.com/lodestone/playguide/db/item/?q=${encodeURIComponent(name)}`; - const response = await fetchWithCookies(searchURL); - const body = await response.text(); - // find an `` in the HTML response whose text exactly matches the name - const match = body.match(itemLinkRegExp(name)); - // return the ID parsed from the URL in the `href` attribute - return match?.groups.id; -} - -/** - * Gets the current contents of the named item's wiki page and returns the - * contents with the infobox updated to use the given EDB item ID. - * @param {string} name - * @returns {Promise} - */ -async function getWikiPageContents (name) { - const response = await fetchWithCookies(`https://ffxiv.consolegameswiki.com/mediawiki/index.php?action=raw&title=${encodeURIComponent(name)}`); - const rawContents = await response.text(); - return rawContents; -} +import {findItemEDBID} from './lodestone.mjs'; +import {MediaWikiClient} from './mediawiki.mjs'; +import {diff} from './util.mjs'; /** * Matches an empty `id-edb` infobox parameter which can just have a value @@ -84,78 +40,50 @@ function insertInfoboxEDBID (pageContent, edbID) { throw new Error('Dunno how to insert the parameter into this page'); } -const wikiAPI = 'https://ffxiv.consolegameswiki.com/mediawiki/api.php'; +// Log into our wiki client +const mw = new MediaWikiClient('https://ffxiv.consolegameswiki.com/mediawiki'); +await mw.login(process.env.MW_USERNAME, process.env.MW_PASSWORD); -/** - * Gets the list of wiki pages from "Category:Missing EDB ID". - * @returns {Promise<{pageid: number; title: string}[]>} - */ -async function getItemPagesWithNoEDBID () { - const response = await fetchWithCookies(`https://ffxiv.consolegameswiki.com/mediawiki/api.php?${new URLSearchParams({ - action: 'query', - list: 'categorymembers', - cmlimit: 500, - cmtitle: 'Category:Missing EDB ID', - format: 'json', - })}`); - const body = await response.json(); - if (body.error) { - throw new Error(`[${body.error.code}] ${body.error.info}`); - } - return body.query.categorymembers; -} +// Get pages in the "Missing EDB ID" category +const itemPagesWithoutEDBIDs = (await mw.listCategoryPages('Category:Missing EDB ID', 500)).slice(345, 346); +console.log('Processing', itemPagesWithoutEDBIDs.length, 'item pages from [[Category:Missing EDB ID]]\n'); -/** terrible terrible terrible string diff helper for debugging */ -function diff (a, b) { - // base64 input strings before passing to shell to avoid escaping issues - // https://stackoverflow.com/a/60221847 - // also use `|| true` to not throw an error when `diff` returns non-zero - execSync(`bash -c ' - diff --color -u <(echo ${btoa(a)} | base64 -d) <(echo ${btoa(b)} | base64 -d) - ' || true`, { - // display result directly in terminal - stdio: 'inherit', - }); -} +for (const {title} of itemPagesWithoutEDBIDs) { + // this runs serially with an artificial delay between requests to decrease + // the chance of sqenix sending ninjas to my house + await new Promise(resolve => setTimeout(resolve, 5000)); -/** - * Given an item name, looks up its EDB ID and edits its wiki page to include - * that ID in the item infobox if it doesn't already. - */ -async function processItem (name) { - console.log('Page:', name); - const edbID = await findItemID(name); + console.log('Page:', title); + // look up on EDB + const edbID = await findItemEDBID(title); if (!edbID) { console.log('No EDB ID found for this item, skipping'); - return; + continue; } console.log('EDB ID:', edbID, `(https://na.finalfantasyxiv.com/lodestone/playguide/db/item/${encodeURIComponent(edbID)})`); + // rewrite wiki page to include id-edb infobox parameter let updatedText; try { - const originalText = await getWikiPageContents(name); + const originalText = await mw.readPage(title); updatedText = insertInfoboxEDBID(originalText, edbID); diff(originalText, updatedText); } catch (error) { console.log(error); console.log('not doing anything with this item'); - return; + continue; } - // TODO: actually submit wiki edit -} - -const itemPagesWithoutEDBIDs = await getItemPagesWithNoEDBID(); -console.log('Looking up EDB IDs of', itemPagesWithoutEDBIDs.length, 'items\n'); - -for (const {title} of itemPagesWithoutEDBIDs) { - await processItem(title); - + // write the new stuff back to the wiki + try { + // await mw.editPage(title); + } catch (error) { + console.error(error); + console.error('writes should not fail, this seems bad, dying now'); + process.exit(1); + } + console.log('Written.'); console.log(); - - // this runs serially with an artificial delay between requests to decrease - // the chance of sqenix sending ninjas to my house - await new Promise(resolve => setTimeout(resolve, 5000)); } console.log('done!'); diff --git a/lodestone.mjs b/lodestone.mjs new file mode 100644 index 0000000..034c6fc --- /dev/null +++ b/lodestone.mjs @@ -0,0 +1,32 @@ +// Utilities for scraping data from the Lodestone + +/** + * @see https://stackoverflow.com/a/6969486 + * @param {string} + * @returns {string} + */ +const regExpEscape = str => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + +/** + * Creates a regular expression that matches a link to the named item and + * captures its EDB ID from the matched link's `href` attribute. + * @param {string} name + * @returns {RegExp} + */ +const itemLinkRegExp = name => new RegExp(`]*>(?${regExpEscape(name)})`, 'i'); + +/** + * Gets the ID of the named item in Eorzea Database. + * @param {string} name + * @returns {Promise} + */ +export async function findItemEDBID (name) { + // execute a search for the item's name + const searchURL = `https://na.finalfantasyxiv.com/lodestone/playguide/db/item/?q=${encodeURIComponent(name)}`; + const response = await fetch(searchURL); + const body = await response.text(); + // find an `` in the HTML response whose text exactly matches the name + const match = body.match(itemLinkRegExp(name)); + // return the ID parsed from the URL in the `href` attribute + return match?.groups.id; +} diff --git a/mediawiki.mjs b/mediawiki.mjs new file mode 100644 index 0000000..73f1ed3 --- /dev/null +++ b/mediawiki.mjs @@ -0,0 +1,179 @@ +// Extremely basic API client for MediaWiki + +import makeFetchCookie from 'fetch-cookie'; + +function formDataBody (entries) { + let data = new FormData(); + for (const [key, value] of Object.entries(entries)) { + if (value != null && value != false) { + data.set(key, value); + } + } + return data; +} + +export class MediaWikiClient { + /** + * Creates a new client. Remember to also call `.login()`. + * @param {string} wikiURL Target wiki's MediaWiki path (i.e. the path that + * contains `index.php` and `api.php`) without a trailing slash. For example + * for English Wikipedia this would be `'https://en.wikipedia.org/w'`. + */ + constructor (wikiURL) { + this.wikiURL = wikiURL; + this.fetch = makeFetchCookie(fetch); + } + + /** + * Makes a GET request against `index.php`. + * @param {Record} params Query string parameters + * @param {RequestInit} [options] Additional fetch options + * @returns {Promise} + */ + fetchIndexGet (params, options = {}) { + return this.fetch(`${this.wikiURL}/index.php?${new URLSearchParams(params)}`, { + ...options, + method: 'GET', + }); + } + + /** + * Makes a JSON GET request against `api.php`. + * @param {Record} params Query string parameters + * @param {RequestInit} [options] Additional fetch options + * @returns {Promise} + */ + async fetchApiGet (params, options = {}) { + const response = await this.fetch(`${this.wikiURL}/api.php?${new URLSearchParams({ + ...params, + format: 'json', + })}`, { + ...options, + method: 'GET', + }); + const body = await response.json(); + if (body.error) { + throw new Error(`[${body.error.code}] ${body.error.info}`); + } + return body; + } + + /** + * Makes a JSON POST request against `api.php`. + * @param {Record} params Form data body parameters + * @param {RequestInit} [options] Additional fetch options + * @returns {Promise} + */ + async fetchApiPost (params, options = {}) { + const response = await this.fetch(`${this.wikiURL}/api.php`, { + ...options, + method: 'POST', + body: formDataBody({ + ...params, + format: 'json', + }), + }); + return response.json(); + } + + /** + * Obtains a login token for authenticating. + * @returns {Promise} + */ + async getLoginToken () { + const body = await this.fetchApiGet({ + action: 'query', + meta: 'tokens', + type: 'login', + }); + return body.query.tokens.logintoken; + } + + /** + * Obtains a CSRF token for making edits. + * @returns {Promise} + */ + async getCSRFToken () { + const body = await this.fetchApiGet({ + action: 'query', + meta: 'tokens', + }); + return body.query.tokens.csrftoken; + } + + /** + * Logs in with the given bot credentials. + * @param {string} username + * @param {string} password + * @returns {Promise} + */ + async login (username, password) { + const loginToken = await this.getLoginToken(); + const body = await this.fetchApiPost({ + action: 'login', + lgname: username, + lgpassword: password, + lgtoken: loginToken, + }); + if (body.login.result === 'Failed') { + throw new Error(body.login.reason); + } + } + + /** + * Gets the current contents of the named item's wiki page and returns the + * contents with the infobox updated to use the given EDB item ID. + * @param {string} name + * @returns {Promise} + */ + async readPage (title) { + const response = await this.fetchIndexGet({ + action: 'raw', + title, + }); + return response.text(); + } + + /** + * Updates the named page to the given text. + * @param {string} title + * @param {string} text + * @param {string} summary Edit summary + * @param {boolean} [minor] If true, this is a minor edit + * @returns {Promise} + */ + async editPage (title, text, summary, minor = false) { + const csrfToken = await this.getCSRFToken(); + const body = await this.fetchApiPost({ + action: 'edit', + title, + text, + summary, + minor, + bot: true, + watchlist: 'nochange', + token: csrfToken, + format: 'json', + }); + // TODO: error handling + console.log(body); + return body; + } + + /** + * Gets the list of wiki pages that belong to the given category. + * @param {string} name Category name including the `Category:` namespace. + * @param {string} limit Maximum number of items to return. Must be 500 or + * less. I'm lazy and not supporting API paging so deal with it. + * @returns {Promise<{pageid: number; title: string}[]>} + */ + async listCategoryPages (name, limit = 50) { + const body = await this.fetchApiGet({ + action: 'query', + list: 'categorymembers', + cmtitle: name, + cmlimit: limit, + }); + return body.query.categorymembers; + } +} diff --git a/util.mjs b/util.mjs new file mode 100644 index 0000000..12bd843 --- /dev/null +++ b/util.mjs @@ -0,0 +1,20 @@ +// misc helpers + +import {execSync} from 'node:child_process'; + +/** + * terrible terrible terrible string diff helper for debugging + * @param {string} a + * @param {string} b + */ +export function diff (a, b) { + // base64 input strings before passing to shell to avoid escaping issues + // https://stackoverflow.com/a/60221847 + // also use `|| true` to not throw an error when `diff` returns non-zero + execSync(`bash -c ' + diff --color -u <(echo ${btoa(a)} | base64 -d) <(echo ${btoa(b)} | base64 -d) + ' || true`, { + // display result directly in terminal + stdio: 'inherit', + }); +}