diff --git a/edb-id-bot.mjs b/edb-id-bot.mjs
index 86427bb..8d9a3b3 100755
--- a/edb-id-bot.mjs
+++ b/edb-id-bot.mjs
@@ -1,52 +1,8 @@
#!/usr/bin/env node
-import {execSync} from 'node:child_process';
-import makeFetchCookie from 'fetch-cookie';
-
-const fetchWithCookies = makeFetchCookie(fetch);
-
-/**
- * @see https://stackoverflow.com/a/6969486
- * @param {string}
- * @returns {string}
- */
-const regExpEscape = str => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-
-/**
- * Creates a regular expression that matches a link to the named item and
- * captures its EDB ID from the matched link's `href` attribute.
- * @param {string} name
- * @returns {RegExp}
- */
-const itemLinkRegExp = name => new RegExp(`]*>(?${regExpEscape(name)})`, 'i');
-
-/**
- * Gets the ID of the named item in Eorzea Database.
- * @param {string} name
- * @returns {Promise}
- */
-async function findItemID (name) {
- // execute a search for the item's name
- const searchURL = `https://na.finalfantasyxiv.com/lodestone/playguide/db/item/?q=${encodeURIComponent(name)}`;
- const response = await fetchWithCookies(searchURL);
- const body = await response.text();
- // find an `` in the HTML response whose text exactly matches the name
- const match = body.match(itemLinkRegExp(name));
- // return the ID parsed from the URL in the `href` attribute
- return match?.groups.id;
-}
-
-/**
- * Gets the current contents of the named item's wiki page and returns the
- * contents with the infobox updated to use the given EDB item ID.
- * @param {string} name
- * @returns {Promise}
- */
-async function getWikiPageContents (name) {
- const response = await fetchWithCookies(`https://ffxiv.consolegameswiki.com/mediawiki/index.php?action=raw&title=${encodeURIComponent(name)}`);
- const rawContents = await response.text();
- return rawContents;
-}
+import {findItemEDBID} from './lodestone.mjs';
+import {MediaWikiClient} from './mediawiki.mjs';
+import {diff} from './util.mjs';
/**
* Matches an empty `id-edb` infobox parameter which can just have a value
@@ -84,78 +40,50 @@ function insertInfoboxEDBID (pageContent, edbID) {
throw new Error('Dunno how to insert the parameter into this page');
}
-const wikiAPI = 'https://ffxiv.consolegameswiki.com/mediawiki/api.php';
+// Log into our wiki client
+const mw = new MediaWikiClient('https://ffxiv.consolegameswiki.com/mediawiki');
+await mw.login(process.env.MW_USERNAME, process.env.MW_PASSWORD);
-/**
- * Gets the list of wiki pages from "Category:Missing EDB ID".
- * @returns {Promise<{pageid: number; title: string}[]>}
- */
-async function getItemPagesWithNoEDBID () {
- const response = await fetchWithCookies(`https://ffxiv.consolegameswiki.com/mediawiki/api.php?${new URLSearchParams({
- action: 'query',
- list: 'categorymembers',
- cmlimit: 500,
- cmtitle: 'Category:Missing EDB ID',
- format: 'json',
- })}`);
- const body = await response.json();
- if (body.error) {
- throw new Error(`[${body.error.code}] ${body.error.info}`);
- }
- return body.query.categorymembers;
-}
+// Get pages in the "Missing EDB ID" category
+const itemPagesWithoutEDBIDs = (await mw.listCategoryPages('Category:Missing EDB ID', 500)).slice(345, 346);
+console.log('Processing', itemPagesWithoutEDBIDs.length, 'item pages from [[Category:Missing EDB ID]]\n');
-/** terrible terrible terrible string diff helper for debugging */
-function diff (a, b) {
- // base64 input strings before passing to shell to avoid escaping issues
- // https://stackoverflow.com/a/60221847
- // also use `|| true` to not throw an error when `diff` returns non-zero
- execSync(`bash -c '
- diff --color -u <(echo ${btoa(a)} | base64 -d) <(echo ${btoa(b)} | base64 -d)
- ' || true`, {
- // display result directly in terminal
- stdio: 'inherit',
- });
-}
+for (const {title} of itemPagesWithoutEDBIDs) {
+ // this runs serially with an artificial delay between requests to decrease
+ // the chance of sqenix sending ninjas to my house
+ await new Promise(resolve => setTimeout(resolve, 5000));
-/**
- * Given an item name, looks up its EDB ID and edits its wiki page to include
- * that ID in the item infobox if it doesn't already.
- */
-async function processItem (name) {
- console.log('Page:', name);
- const edbID = await findItemID(name);
+ console.log('Page:', title);
+ // look up on EDB
+ const edbID = await findItemEDBID(title);
if (!edbID) {
console.log('No EDB ID found for this item, skipping');
- return;
+ continue;
}
console.log('EDB ID:', edbID, `(https://na.finalfantasyxiv.com/lodestone/playguide/db/item/${encodeURIComponent(edbID)})`);
+ // rewrite wiki page to include id-edb infobox parameter
let updatedText;
try {
- const originalText = await getWikiPageContents(name);
+ const originalText = await mw.readPage(title);
updatedText = insertInfoboxEDBID(originalText, edbID);
diff(originalText, updatedText);
} catch (error) {
console.log(error);
console.log('not doing anything with this item');
- return;
+ continue;
}
- // TODO: actually submit wiki edit
-}
-
-const itemPagesWithoutEDBIDs = await getItemPagesWithNoEDBID();
-console.log('Looking up EDB IDs of', itemPagesWithoutEDBIDs.length, 'items\n');
-
-for (const {title} of itemPagesWithoutEDBIDs) {
- await processItem(title);
-
+ // write the new stuff back to the wiki
+ try {
+ // await mw.editPage(title);
+ } catch (error) {
+ console.error(error);
+ console.error('writes should not fail, this seems bad, dying now');
+ process.exit(1);
+ }
+ console.log('Written.');
console.log();
-
- // this runs serially with an artificial delay between requests to decrease
- // the chance of sqenix sending ninjas to my house
- await new Promise(resolve => setTimeout(resolve, 5000));
}
console.log('done!');
diff --git a/lodestone.mjs b/lodestone.mjs
new file mode 100644
index 0000000..034c6fc
--- /dev/null
+++ b/lodestone.mjs
@@ -0,0 +1,32 @@
+// Utilities for scraping data from the Lodestone
+
+/**
+ * @see https://stackoverflow.com/a/6969486
+ * @param {string}
+ * @returns {string}
+ */
+const regExpEscape = str => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+
+/**
+ * Creates a regular expression that matches a link to the named item and
+ * captures its EDB ID from the matched link's `href` attribute.
+ * @param {string} name
+ * @returns {RegExp}
+ */
+const itemLinkRegExp = name => new RegExp(`]*>(?${regExpEscape(name)})`, 'i');
+
+/**
+ * Gets the ID of the named item in Eorzea Database.
+ * @param {string} name
+ * @returns {Promise}
+ */
+export async function findItemEDBID (name) {
+ // execute a search for the item's name
+ const searchURL = `https://na.finalfantasyxiv.com/lodestone/playguide/db/item/?q=${encodeURIComponent(name)}`;
+ const response = await fetch(searchURL);
+ const body = await response.text();
+ // find an `` in the HTML response whose text exactly matches the name
+ const match = body.match(itemLinkRegExp(name));
+ // return the ID parsed from the URL in the `href` attribute
+ return match?.groups.id;
+}
diff --git a/mediawiki.mjs b/mediawiki.mjs
new file mode 100644
index 0000000..73f1ed3
--- /dev/null
+++ b/mediawiki.mjs
@@ -0,0 +1,179 @@
+// Extremely basic API client for MediaWiki
+
+import makeFetchCookie from 'fetch-cookie';
+
+function formDataBody (entries) {
+ let data = new FormData();
+ for (const [key, value] of Object.entries(entries)) {
+ if (value != null && value != false) {
+ data.set(key, value);
+ }
+ }
+ return data;
+}
+
+export class MediaWikiClient {
+ /**
+ * Creates a new client. Remember to also call `.login()`.
+ * @param {string} wikiURL Target wiki's MediaWiki path (i.e. the path that
+ * contains `index.php` and `api.php`) without a trailing slash. For example
+ * for English Wikipedia this would be `'https://en.wikipedia.org/w'`.
+ */
+ constructor (wikiURL) {
+ this.wikiURL = wikiURL;
+ this.fetch = makeFetchCookie(fetch);
+ }
+
+ /**
+ * Makes a GET request against `index.php`.
+ * @param {Record} params Query string parameters
+ * @param {RequestInit} [options] Additional fetch options
+ * @returns {Promise}
+ */
+ fetchIndexGet (params, options = {}) {
+ return this.fetch(`${this.wikiURL}/index.php?${new URLSearchParams(params)}`, {
+ ...options,
+ method: 'GET',
+ });
+ }
+
+ /**
+ * Makes a JSON GET request against `api.php`.
+ * @param {Record} params Query string parameters
+ * @param {RequestInit} [options] Additional fetch options
+ * @returns {Promise}
+ */
+ async fetchApiGet (params, options = {}) {
+ const response = await this.fetch(`${this.wikiURL}/api.php?${new URLSearchParams({
+ ...params,
+ format: 'json',
+ })}`, {
+ ...options,
+ method: 'GET',
+ });
+ const body = await response.json();
+ if (body.error) {
+ throw new Error(`[${body.error.code}] ${body.error.info}`);
+ }
+ return body;
+ }
+
+ /**
+ * Makes a JSON POST request against `api.php`.
+ * @param {Record} params Form data body parameters
+ * @param {RequestInit} [options] Additional fetch options
+ * @returns {Promise}
+ */
+ async fetchApiPost (params, options = {}) {
+ const response = await this.fetch(`${this.wikiURL}/api.php`, {
+ ...options,
+ method: 'POST',
+ body: formDataBody({
+ ...params,
+ format: 'json',
+ }),
+ });
+ return response.json();
+ }
+
+ /**
+ * Obtains a login token for authenticating.
+ * @returns {Promise}
+ */
+ async getLoginToken () {
+ const body = await this.fetchApiGet({
+ action: 'query',
+ meta: 'tokens',
+ type: 'login',
+ });
+ return body.query.tokens.logintoken;
+ }
+
+ /**
+ * Obtains a CSRF token for making edits.
+ * @returns {Promise}
+ */
+ async getCSRFToken () {
+ const body = await this.fetchApiGet({
+ action: 'query',
+ meta: 'tokens',
+ });
+ return body.query.tokens.csrftoken;
+ }
+
+ /**
+ * Logs in with the given bot credentials.
+ * @param {string} username
+ * @param {string} password
+ * @returns {Promise}
+ */
+ async login (username, password) {
+ const loginToken = await this.getLoginToken();
+ const body = await this.fetchApiPost({
+ action: 'login',
+ lgname: username,
+ lgpassword: password,
+ lgtoken: loginToken,
+ });
+ if (body.login.result === 'Failed') {
+ throw new Error(body.login.reason);
+ }
+ }
+
+ /**
+ * Gets the current contents of the named item's wiki page and returns the
+ * contents with the infobox updated to use the given EDB item ID.
+ * @param {string} name
+ * @returns {Promise}
+ */
+ async readPage (title) {
+ const response = await this.fetchIndexGet({
+ action: 'raw',
+ title,
+ });
+ return response.text();
+ }
+
+ /**
+ * Updates the named page to the given text.
+ * @param {string} title
+ * @param {string} text
+ * @param {string} summary Edit summary
+ * @param {boolean} [minor] If true, this is a minor edit
+ * @returns {Promise}
+ */
+ async editPage (title, text, summary, minor = false) {
+ const csrfToken = await this.getCSRFToken();
+ const body = await this.fetchApiPost({
+ action: 'edit',
+ title,
+ text,
+ summary,
+ minor,
+ bot: true,
+ watchlist: 'nochange',
+ token: csrfToken,
+ format: 'json',
+ });
+ // TODO: error handling
+ console.log(body);
+ return body;
+ }
+
+ /**
+ * Gets the list of wiki pages that belong to the given category.
+ * @param {string} name Category name including the `Category:` namespace.
+ * @param {string} limit Maximum number of items to return. Must be 500 or
+ * less. I'm lazy and not supporting API paging so deal with it.
+ * @returns {Promise<{pageid: number; title: string}[]>}
+ */
+ async listCategoryPages (name, limit = 50) {
+ const body = await this.fetchApiGet({
+ action: 'query',
+ list: 'categorymembers',
+ cmtitle: name,
+ cmlimit: limit,
+ });
+ return body.query.categorymembers;
+ }
+}
diff --git a/util.mjs b/util.mjs
new file mode 100644
index 0000000..12bd843
--- /dev/null
+++ b/util.mjs
@@ -0,0 +1,20 @@
+// misc helpers
+
+import {execSync} from 'node:child_process';
+
+/**
+ * terrible terrible terrible string diff helper for debugging
+ * @param {string} a
+ * @param {string} b
+ */
+export function diff (a, b) {
+ // base64 input strings before passing to shell to avoid escaping issues
+ // https://stackoverflow.com/a/60221847
+ // also use `|| true` to not throw an error when `diff` returns non-zero
+ execSync(`bash -c '
+ diff --color -u <(echo ${btoa(a)} | base64 -d) <(echo ${btoa(b)} | base64 -d)
+ ' || true`, {
+ // display result directly in terminal
+ stdio: 'inherit',
+ });
+}