break things up, do some proper mediawiki API stuff to support editing
This commit is contained in:
parent
64eb4da894
commit
e3af35a68e
4 changed files with 260 additions and 101 deletions
130
edb-id-bot.mjs
130
edb-id-bot.mjs
|
|
@ -1,52 +1,8 @@
|
||||||
#!/usr/bin/env node
|
#!/usr/bin/env node
|
||||||
|
|
||||||
import {execSync} from 'node:child_process';
|
import {findItemEDBID} from './lodestone.mjs';
|
||||||
import makeFetchCookie from 'fetch-cookie';
|
import {MediaWikiClient} from './mediawiki.mjs';
|
||||||
|
import {diff} from './util.mjs';
|
||||||
const fetchWithCookies = makeFetchCookie(fetch);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @see https://stackoverflow.com/a/6969486
|
|
||||||
* @param {string}
|
|
||||||
* @returns {string}
|
|
||||||
*/
|
|
||||||
const regExpEscape = str => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a regular expression that matches a link to the named item and
|
|
||||||
* captures its EDB ID from the matched link's `href` attribute.
|
|
||||||
* @param {string} name
|
|
||||||
* @returns {RegExp}
|
|
||||||
*/
|
|
||||||
const itemLinkRegExp = name => new RegExp(`<a href="/lodestone/playguide/db/item/(?<id>[a-z0-9]+)[^"]+"[^>]*>(?<name>${regExpEscape(name)})</a>`, 'i');
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the ID of the named item in Eorzea Database.
|
|
||||||
* @param {string} name
|
|
||||||
* @returns {Promise<string | undefined>}
|
|
||||||
*/
|
|
||||||
async function findItemID (name) {
|
|
||||||
// execute a search for the item's name
|
|
||||||
const searchURL = `https://na.finalfantasyxiv.com/lodestone/playguide/db/item/?q=${encodeURIComponent(name)}`;
|
|
||||||
const response = await fetchWithCookies(searchURL);
|
|
||||||
const body = await response.text();
|
|
||||||
// find an `<a>` in the HTML response whose text exactly matches the name
|
|
||||||
const match = body.match(itemLinkRegExp(name));
|
|
||||||
// return the ID parsed from the URL in the `href` attribute
|
|
||||||
return match?.groups.id;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the current contents of the named item's wiki page and returns the
|
|
||||||
* contents with the infobox updated to use the given EDB item ID.
|
|
||||||
* @param {string} name
|
|
||||||
* @returns {Promise<string>}
|
|
||||||
*/
|
|
||||||
async function getWikiPageContents (name) {
|
|
||||||
const response = await fetchWithCookies(`https://ffxiv.consolegameswiki.com/mediawiki/index.php?action=raw&title=${encodeURIComponent(name)}`);
|
|
||||||
const rawContents = await response.text();
|
|
||||||
return rawContents;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Matches an empty `id-edb` infobox parameter which can just have a value
|
* Matches an empty `id-edb` infobox parameter which can just have a value
|
||||||
|
|
@ -84,78 +40,50 @@ function insertInfoboxEDBID (pageContent, edbID) {
|
||||||
throw new Error('Dunno how to insert the parameter into this page');
|
throw new Error('Dunno how to insert the parameter into this page');
|
||||||
}
|
}
|
||||||
|
|
||||||
const wikiAPI = 'https://ffxiv.consolegameswiki.com/mediawiki/api.php';
|
// Log into our wiki client
|
||||||
|
const mw = new MediaWikiClient('https://ffxiv.consolegameswiki.com/mediawiki');
|
||||||
|
await mw.login(process.env.MW_USERNAME, process.env.MW_PASSWORD);
|
||||||
|
|
||||||
/**
|
// Get pages in the "Missing EDB ID" category
|
||||||
* Gets the list of wiki pages from "Category:Missing EDB ID".
|
const itemPagesWithoutEDBIDs = (await mw.listCategoryPages('Category:Missing EDB ID', 500)).slice(345, 346);
|
||||||
* @returns {Promise<{pageid: number; title: string}[]>}
|
console.log('Processing', itemPagesWithoutEDBIDs.length, 'item pages from [[Category:Missing EDB ID]]\n');
|
||||||
*/
|
|
||||||
async function getItemPagesWithNoEDBID () {
|
|
||||||
const response = await fetchWithCookies(`https://ffxiv.consolegameswiki.com/mediawiki/api.php?${new URLSearchParams({
|
|
||||||
action: 'query',
|
|
||||||
list: 'categorymembers',
|
|
||||||
cmlimit: 500,
|
|
||||||
cmtitle: 'Category:Missing EDB ID',
|
|
||||||
format: 'json',
|
|
||||||
})}`);
|
|
||||||
const body = await response.json();
|
|
||||||
if (body.error) {
|
|
||||||
throw new Error(`[${body.error.code}] ${body.error.info}`);
|
|
||||||
}
|
|
||||||
return body.query.categorymembers;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** terrible terrible terrible string diff helper for debugging */
|
for (const {title} of itemPagesWithoutEDBIDs) {
|
||||||
function diff (a, b) {
|
// this runs serially with an artificial delay between requests to decrease
|
||||||
// base64 input strings before passing to shell to avoid escaping issues
|
// the chance of sqenix sending ninjas to my house
|
||||||
// https://stackoverflow.com/a/60221847
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||||
// also use `|| true` to not throw an error when `diff` returns non-zero
|
|
||||||
execSync(`bash -c '
|
|
||||||
diff --color -u <(echo ${btoa(a)} | base64 -d) <(echo ${btoa(b)} | base64 -d)
|
|
||||||
' || true`, {
|
|
||||||
// display result directly in terminal
|
|
||||||
stdio: 'inherit',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
console.log('Page:', title);
|
||||||
* Given an item name, looks up its EDB ID and edits its wiki page to include
|
// look up on EDB
|
||||||
* that ID in the item infobox if it doesn't already.
|
const edbID = await findItemEDBID(title);
|
||||||
*/
|
|
||||||
async function processItem (name) {
|
|
||||||
console.log('Page:', name);
|
|
||||||
const edbID = await findItemID(name);
|
|
||||||
if (!edbID) {
|
if (!edbID) {
|
||||||
console.log('No EDB ID found for this item, skipping');
|
console.log('No EDB ID found for this item, skipping');
|
||||||
return;
|
continue;
|
||||||
}
|
}
|
||||||
console.log('EDB ID:', edbID, `(https://na.finalfantasyxiv.com/lodestone/playguide/db/item/${encodeURIComponent(edbID)})`);
|
console.log('EDB ID:', edbID, `(https://na.finalfantasyxiv.com/lodestone/playguide/db/item/${encodeURIComponent(edbID)})`);
|
||||||
|
|
||||||
|
// rewrite wiki page to include id-edb infobox parameter
|
||||||
let updatedText;
|
let updatedText;
|
||||||
try {
|
try {
|
||||||
const originalText = await getWikiPageContents(name);
|
const originalText = await mw.readPage(title);
|
||||||
updatedText = insertInfoboxEDBID(originalText, edbID);
|
updatedText = insertInfoboxEDBID(originalText, edbID);
|
||||||
diff(originalText, updatedText);
|
diff(originalText, updatedText);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
console.log('not doing anything with this item');
|
console.log('not doing anything with this item');
|
||||||
return;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: actually submit wiki edit
|
// write the new stuff back to the wiki
|
||||||
}
|
try {
|
||||||
|
// await mw.editPage(title);
|
||||||
const itemPagesWithoutEDBIDs = await getItemPagesWithNoEDBID();
|
} catch (error) {
|
||||||
console.log('Looking up EDB IDs of', itemPagesWithoutEDBIDs.length, 'items\n');
|
console.error(error);
|
||||||
|
console.error('writes should not fail, this seems bad, dying now');
|
||||||
for (const {title} of itemPagesWithoutEDBIDs) {
|
process.exit(1);
|
||||||
await processItem(title);
|
}
|
||||||
|
console.log('Written.');
|
||||||
console.log();
|
console.log();
|
||||||
|
|
||||||
// this runs serially with an artificial delay between requests to decrease
|
|
||||||
// the chance of sqenix sending ninjas to my house
|
|
||||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('done!');
|
console.log('done!');
|
||||||
|
|
|
||||||
32
lodestone.mjs
Normal file
32
lodestone.mjs
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
// Utilities for scraping data from the Lodestone
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see https://stackoverflow.com/a/6969486
|
||||||
|
* @param {string}
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
const regExpEscape = str => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a regular expression that matches a link to the named item and
|
||||||
|
* captures its EDB ID from the matched link's `href` attribute.
|
||||||
|
* @param {string} name
|
||||||
|
* @returns {RegExp}
|
||||||
|
*/
|
||||||
|
const itemLinkRegExp = name => new RegExp(`<a href="/lodestone/playguide/db/item/(?<id>[a-z0-9]+)[^"]+"[^>]*>(?<name>${regExpEscape(name)})</a>`, 'i');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the ID of the named item in Eorzea Database.
|
||||||
|
* @param {string} name
|
||||||
|
* @returns {Promise<string | undefined>}
|
||||||
|
*/
|
||||||
|
export async function findItemEDBID (name) {
|
||||||
|
// execute a search for the item's name
|
||||||
|
const searchURL = `https://na.finalfantasyxiv.com/lodestone/playguide/db/item/?q=${encodeURIComponent(name)}`;
|
||||||
|
const response = await fetch(searchURL);
|
||||||
|
const body = await response.text();
|
||||||
|
// find an `<a>` in the HTML response whose text exactly matches the name
|
||||||
|
const match = body.match(itemLinkRegExp(name));
|
||||||
|
// return the ID parsed from the URL in the `href` attribute
|
||||||
|
return match?.groups.id;
|
||||||
|
}
|
||||||
179
mediawiki.mjs
Normal file
179
mediawiki.mjs
Normal file
|
|
@ -0,0 +1,179 @@
|
||||||
|
// Extremely basic API client for MediaWiki
|
||||||
|
|
||||||
|
import makeFetchCookie from 'fetch-cookie';
|
||||||
|
|
||||||
|
function formDataBody (entries) {
|
||||||
|
let data = new FormData();
|
||||||
|
for (const [key, value] of Object.entries(entries)) {
|
||||||
|
if (value != null && value != false) {
|
||||||
|
data.set(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class MediaWikiClient {
|
||||||
|
/**
|
||||||
|
* Creates a new client. Remember to also call `.login()`.
|
||||||
|
* @param {string} wikiURL Target wiki's MediaWiki path (i.e. the path that
|
||||||
|
* contains `index.php` and `api.php`) without a trailing slash. For example
|
||||||
|
* for English Wikipedia this would be `'https://en.wikipedia.org/w'`.
|
||||||
|
*/
|
||||||
|
constructor (wikiURL) {
|
||||||
|
this.wikiURL = wikiURL;
|
||||||
|
this.fetch = makeFetchCookie(fetch);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a GET request against `index.php`.
|
||||||
|
* @param {Record<string, string>} params Query string parameters
|
||||||
|
* @param {RequestInit} [options] Additional fetch options
|
||||||
|
* @returns {Promise<Response>}
|
||||||
|
*/
|
||||||
|
fetchIndexGet (params, options = {}) {
|
||||||
|
return this.fetch(`${this.wikiURL}/index.php?${new URLSearchParams(params)}`, {
|
||||||
|
...options,
|
||||||
|
method: 'GET',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a JSON GET request against `api.php`.
|
||||||
|
* @param {Record<string, string>} params Query string parameters
|
||||||
|
* @param {RequestInit} [options] Additional fetch options
|
||||||
|
* @returns {Promise<any>}
|
||||||
|
*/
|
||||||
|
async fetchApiGet (params, options = {}) {
|
||||||
|
const response = await this.fetch(`${this.wikiURL}/api.php?${new URLSearchParams({
|
||||||
|
...params,
|
||||||
|
format: 'json',
|
||||||
|
})}`, {
|
||||||
|
...options,
|
||||||
|
method: 'GET',
|
||||||
|
});
|
||||||
|
const body = await response.json();
|
||||||
|
if (body.error) {
|
||||||
|
throw new Error(`[${body.error.code}] ${body.error.info}`);
|
||||||
|
}
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a JSON POST request against `api.php`.
|
||||||
|
* @param {Record<string, string>} params Form data body parameters
|
||||||
|
* @param {RequestInit} [options] Additional fetch options
|
||||||
|
* @returns {Promise<any>}
|
||||||
|
*/
|
||||||
|
async fetchApiPost (params, options = {}) {
|
||||||
|
const response = await this.fetch(`${this.wikiURL}/api.php`, {
|
||||||
|
...options,
|
||||||
|
method: 'POST',
|
||||||
|
body: formDataBody({
|
||||||
|
...params,
|
||||||
|
format: 'json',
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Obtains a login token for authenticating.
|
||||||
|
* @returns {Promise<string>}
|
||||||
|
*/
|
||||||
|
async getLoginToken () {
|
||||||
|
const body = await this.fetchApiGet({
|
||||||
|
action: 'query',
|
||||||
|
meta: 'tokens',
|
||||||
|
type: 'login',
|
||||||
|
});
|
||||||
|
return body.query.tokens.logintoken;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Obtains a CSRF token for making edits.
|
||||||
|
* @returns {Promise<string>}
|
||||||
|
*/
|
||||||
|
async getCSRFToken () {
|
||||||
|
const body = await this.fetchApiGet({
|
||||||
|
action: 'query',
|
||||||
|
meta: 'tokens',
|
||||||
|
});
|
||||||
|
return body.query.tokens.csrftoken;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logs in with the given bot credentials.
|
||||||
|
* @param {string} username
|
||||||
|
* @param {string} password
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async login (username, password) {
|
||||||
|
const loginToken = await this.getLoginToken();
|
||||||
|
const body = await this.fetchApiPost({
|
||||||
|
action: 'login',
|
||||||
|
lgname: username,
|
||||||
|
lgpassword: password,
|
||||||
|
lgtoken: loginToken,
|
||||||
|
});
|
||||||
|
if (body.login.result === 'Failed') {
|
||||||
|
throw new Error(body.login.reason);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current contents of the named item's wiki page and returns the
|
||||||
|
* contents with the infobox updated to use the given EDB item ID.
|
||||||
|
* @param {string} name
|
||||||
|
* @returns {Promise<string>}
|
||||||
|
*/
|
||||||
|
async readPage (title) {
|
||||||
|
const response = await this.fetchIndexGet({
|
||||||
|
action: 'raw',
|
||||||
|
title,
|
||||||
|
});
|
||||||
|
return response.text();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates the named page to the given text.
|
||||||
|
* @param {string} title
|
||||||
|
* @param {string} text
|
||||||
|
* @param {string} summary Edit summary
|
||||||
|
* @param {boolean} [minor] If true, this is a minor edit
|
||||||
|
* @returns {Promise<any>}
|
||||||
|
*/
|
||||||
|
async editPage (title, text, summary, minor = false) {
|
||||||
|
const csrfToken = await this.getCSRFToken();
|
||||||
|
const body = await this.fetchApiPost({
|
||||||
|
action: 'edit',
|
||||||
|
title,
|
||||||
|
text,
|
||||||
|
summary,
|
||||||
|
minor,
|
||||||
|
bot: true,
|
||||||
|
watchlist: 'nochange',
|
||||||
|
token: csrfToken,
|
||||||
|
format: 'json',
|
||||||
|
});
|
||||||
|
// TODO: error handling
|
||||||
|
console.log(body);
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the list of wiki pages that belong to the given category.
|
||||||
|
* @param {string} name Category name including the `Category:` namespace.
|
||||||
|
* @param {string} limit Maximum number of items to return. Must be 500 or
|
||||||
|
* less. I'm lazy and not supporting API paging so deal with it.
|
||||||
|
* @returns {Promise<{pageid: number; title: string}[]>}
|
||||||
|
*/
|
||||||
|
async listCategoryPages (name, limit = 50) {
|
||||||
|
const body = await this.fetchApiGet({
|
||||||
|
action: 'query',
|
||||||
|
list: 'categorymembers',
|
||||||
|
cmtitle: name,
|
||||||
|
cmlimit: limit,
|
||||||
|
});
|
||||||
|
return body.query.categorymembers;
|
||||||
|
}
|
||||||
|
}
|
||||||
20
util.mjs
Normal file
20
util.mjs
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
// misc helpers
|
||||||
|
|
||||||
|
import {execSync} from 'node:child_process';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* terrible terrible terrible string diff helper for debugging
|
||||||
|
* @param {string} a
|
||||||
|
* @param {string} b
|
||||||
|
*/
|
||||||
|
export function diff (a, b) {
|
||||||
|
// base64 input strings before passing to shell to avoid escaping issues
|
||||||
|
// https://stackoverflow.com/a/60221847
|
||||||
|
// also use `|| true` to not throw an error when `diff` returns non-zero
|
||||||
|
execSync(`bash -c '
|
||||||
|
diff --color -u <(echo ${btoa(a)} | base64 -d) <(echo ${btoa(b)} | base64 -d)
|
||||||
|
' || true`, {
|
||||||
|
// display result directly in terminal
|
||||||
|
stdio: 'inherit',
|
||||||
|
});
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue