const fs = require("fs-extra");
const path = require("path");
const async = require("async");
const openai = require("./lib/openai_client");
const CONCURRENCY_LIMIT = 2; // Adjust this based on your rate limit
async function convertPhpWikiToMarkdown(inputText) {
const response = await openai.chat.completions.create({
You are a REST API for converting text files with metadata.You will receive PhpWiki export files. You answer purely with markdown documents without additional conversational text or formatting.
- Convert the PhpWiki export file you receive including metadata to markdown.
- Convert the PhpWiki metadata to markdown front matter.
Metadata conversion mapping:
- pagename -> title (translate the encoded pagename to a human readable blog title without date in quotes)
- Date -> publishDate (format as YYYY-MM-dd without quotes)
- Infer a plain text front matter description field from the content of the PhpWiki text.
- The description field value must be in double quotes and suitable for a SEO optimized description HTML meta tag.
- Infer a front matter tags field with up to 3 high level categorical tags as a JSON array from the content of the PhpWiki text. The tags should be lower case kebab-case.
- The actual content starts after "Content-Transfer-Encoding: binary". Keep the original text and language, never translate to another language.
PhpWiki Syntax conversion hints for the original text:
- Convert PhpWiki links in the format [text|url] or [text | url] to Markdown's [text](url).
- If the url of a PhpWiki link is relative and does not contain http or https, then kebab-case the url and prefix it with /wiki/
- Convert image links starting with http://www.metaportaldermedienpolemik.net/artefact/images/ to just /images/
- Convert links in the form of [CamelCaseWord] to [Camel Case Word](/wiki/camel-case-word)
- Convert words that are CamelCase formatted to [Camel Case](/wiki/camel-case)
// Define the pattern to match the starting and ending markdown tags
const pattern = /^```markdown\s([\s\S]*?)\s```$/;
// Use replace method to remove the matched parts
return response.choices[0].message.content.trim().replace(pattern, "$1");
console.error("Error during conversion:", error);
async function processFile(file, inputDir, outputDir) {
const filePath = path.join(inputDir, file);
const fileContents = await fs.readFile(filePath, "utf-8");
// Takes the original file name (which was just the PhpWiki page title)
// and converts it to something like `2018-10-03-the-title.md`.
const outputFileEls = file.split("%2F");
const outputFileName = `${outputFileEls[1]}-${outputFileEls[2].split(" ").join("-").toLowerCase()}.md`;
const outputFilePath = path.join(outputDir, outputFileName);
// Check if the output file already exists
if (await fs.pathExists(outputFilePath)) {
console.log(`Output file for ${file} already exists. Skipping conversion.`);
const markdown = await convertPhpWikiToMarkdown(fileContents);
await fs.outputFile(outputFilePath, markdown);
console.log(`Converted ${file} to markdown.`);
console.log(`Failed to convert ${file}.`);
console.error(`Error processing file ${file}:`, error);
async function processFiles(inputDir, outputDir) {
const files = await fs.readdir(inputDir);
const queue = async.queue(async (file, callback) => {
await processFile(file, inputDir, outputDir);
console.log("All files have been processed.");
console.error("Error processing files:", error);
const inputDir = path.join(__dirname, "input");
const outputDir = path.join(__dirname, "output");
processFiles(inputDir, outputDir);
Comments powered by Talkyard.