add: export docx with math
This commit is contained in:
parent
6e3094c69c
commit
622cef7cf3
|
|
@ -6,6 +6,10 @@
|
|||
src="chrome://__addonRef__/content/scripts/docxWorker.js"
|
||||
type="application/javascript"
|
||||
></script>
|
||||
<script
|
||||
src="chrome://__addonRef__/content/lib/js/SaxonJS2.rt.js"
|
||||
type="application/javascript"
|
||||
></script>
|
||||
</head>
|
||||
<body></body>
|
||||
</html>
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -98,6 +98,7 @@
|
|||
"release-it": "^16.1.5",
|
||||
"replace-in-file": "^7.0.1",
|
||||
"typescript": "^5.2.2",
|
||||
"xslt3": "^2.6.0",
|
||||
"zotero-types": "^1.3.5"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
npx xslt3 -t -xsl:scripts/docx/mml2omml.xsl -export:addon/chrome/content/lib/js/mml2omml.sef.json -nogo -relocate:on -ns:##html5
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,13 +1,32 @@
|
|||
// @ts-ignore
|
||||
import { config } from "../../package.json";
|
||||
|
||||
// @ts-ignore defined by html-docx-js
|
||||
import htmlDocx from "html-docx-js/dist/html-docx";
|
||||
|
||||
const XSL_PATH = `chrome://${config.addonRef}/content/lib/js/mml2omml.sef.json`;
|
||||
|
||||
// this runs in a iframe. accept input message
|
||||
// and return output message
|
||||
onmessage = ({ data: { type, jobId, message } }) => {
|
||||
onmessage = async ({ data: { type, jobID, message } }) => {
|
||||
if (type === "parseDocx") {
|
||||
console.log("DOCX Worker", type, jobId, message);
|
||||
console.log("DOCX Worker", type, jobID, message);
|
||||
const blob = htmlDocx.asBlob(message);
|
||||
console.log("DOCX Worker", blob);
|
||||
postMessage({ type: "parseDocxReturn", jobId, message: blob }, "*");
|
||||
postMessage({ type: "parseDocxReturn", jobID, message: blob }, "*");
|
||||
} else if (type === "parseMML") {
|
||||
console.log("MML Worker", type, jobID, message);
|
||||
// @ts-ignore defined by SaxonJS
|
||||
const result = await SaxonJS.transform(
|
||||
{
|
||||
stylesheetLocation: XSL_PATH,
|
||||
sourceType: "xml",
|
||||
sourceText: message,
|
||||
destination: "serialized",
|
||||
},
|
||||
"async",
|
||||
);
|
||||
postMessage(
|
||||
{ type: "parseMMLReturn", jobID, message: result.principalResult },
|
||||
"*",
|
||||
);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -13,8 +13,11 @@ export async function saveDocx(filename: string, noteId: number) {
|
|||
}
|
||||
|
||||
async function note2docx(noteItem: Zotero.Item) {
|
||||
const renderedContent = parseDocxCitationFields(
|
||||
const worker = await getWorker();
|
||||
|
||||
const renderedContent = await parseDocxFields(
|
||||
await renderNoteHTML(noteItem),
|
||||
worker,
|
||||
);
|
||||
let htmlDoc =
|
||||
'<!DOCTYPE html>\n<html lang="en"><head><meta charset="UTF-8"></head>\n';
|
||||
|
|
@ -23,36 +26,46 @@ async function note2docx(noteItem: Zotero.Item) {
|
|||
|
||||
ztoolkit.log(`[Note2DOCX] ${htmlDoc}`);
|
||||
|
||||
let blob: ArrayBufferLike;
|
||||
const lock = Zotero.Promise.defer();
|
||||
const jobId = randomString(6, new Date().toUTCString());
|
||||
const listener = (ev: MessageEvent) => {
|
||||
if (ev.data.type === "parseDocxReturn" && ev.data.jobId === jobId) {
|
||||
blob = ev.data.message;
|
||||
lock.resolve();
|
||||
}
|
||||
};
|
||||
const worker = await getWorker();
|
||||
worker.contentWindow?.addEventListener("message", listener);
|
||||
worker.contentWindow?.postMessage(
|
||||
{
|
||||
type: "parseDocx",
|
||||
jobId,
|
||||
message: htmlDoc,
|
||||
},
|
||||
"*",
|
||||
);
|
||||
await lock.promise;
|
||||
worker.contentWindow?.removeEventListener("message", listener);
|
||||
const blob = await sendWorkerTask(worker, "parseDocx", htmlDoc);
|
||||
destroyWorker(worker);
|
||||
return blob!;
|
||||
}
|
||||
|
||||
type CitationCache = Record<string, { field: string; text: string }>;
|
||||
|
||||
function parseDocxCitationFields(html: string) {
|
||||
async function parseDocxFields(html: string, worker: HTMLIFrameElement) {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(html, "text/html");
|
||||
|
||||
// Remove katex html elements to prevent duplicate rendering
|
||||
doc.querySelectorAll(".katex-html").forEach((elem) => {
|
||||
elem.remove();
|
||||
});
|
||||
|
||||
const mathCache = {} as MathCache;
|
||||
|
||||
for (const elem of Array.from(doc.querySelectorAll("math"))) {
|
||||
let str = (await sendWorkerTask(
|
||||
worker,
|
||||
"parseMML",
|
||||
elem.outerHTML,
|
||||
)) as string;
|
||||
if (!str) {
|
||||
continue;
|
||||
}
|
||||
str = str.replaceAll('<?xml version="1.0" encoding="UTF-8"?>', "");
|
||||
if (elem.getAttribute("display") === "block") {
|
||||
str = `<m:oMathPara xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math">${str}</m:oMathPara>`;
|
||||
}
|
||||
const newElem = doc.createElement("span");
|
||||
const mathID = getCacheID(mathCache, {
|
||||
math: "",
|
||||
});
|
||||
mathCache[mathID].math = str;
|
||||
newElem.setAttribute("data-bn-math-index", mathID);
|
||||
elem.parentNode!.replaceChild(newElem, elem);
|
||||
}
|
||||
|
||||
const citationCache = {} as CitationCache;
|
||||
/*
|
||||
[
|
||||
|
|
@ -110,7 +123,10 @@ function parseDocxCitationFields(html: string) {
|
|||
properties.formattedCitation = formattedCitation;
|
||||
properties.plainCitation = formattedCitation + " ";
|
||||
properties.noteIndex = 0;
|
||||
const citationID = getCitationID(citationCache);
|
||||
const citationID = getCacheID(citationCache, {
|
||||
field: "",
|
||||
text: "",
|
||||
});
|
||||
|
||||
const csl = {
|
||||
citationID,
|
||||
|
|
@ -171,11 +187,22 @@ function parseDocxCitationFields(html: string) {
|
|||
*/
|
||||
}
|
||||
|
||||
const str = doc.body.innerHTML;
|
||||
let str = doc.body.innerHTML;
|
||||
|
||||
// Replace all <span data-bn-math-index="T21wEH05"></span> with <!--[if gte msEquation 12]><m:oMath...</m:oMath><![endif]-->
|
||||
const mathRegexp = /<span data-bn-math-index="([^"]+)"><\/span>/g;
|
||||
str = str.replace(mathRegexp, (match, p1) => {
|
||||
return `<!--[if gte msEquation 12]>${mathCache[p1].math}<![endif]-->`;
|
||||
});
|
||||
|
||||
str = str.replaceAll(
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/math",
|
||||
"http://schemas.microsoft.com/office/2004/12/omml",
|
||||
);
|
||||
|
||||
// Replace all <span data-bn-citation-index="T21wEH05"></span> with ADDIN ZOTERO_ITEM CSL_CITATION {...}
|
||||
const re = /<span data-bn-citation-index="([^"]+)"><\/span>/g;
|
||||
let parsed = str.replace(re, (match, p1) => {
|
||||
const citationRegexp = /<span data-bn-citation-index="([^"]+)"><\/span>/g;
|
||||
str = str.replace(citationRegexp, (match, p1) => {
|
||||
return generateDocxField(
|
||||
`ADDIN ZOTERO_ITEM CSL_CITATION ${htmlEscape(
|
||||
doc,
|
||||
|
|
@ -185,24 +212,23 @@ function parseDocxCitationFields(html: string) {
|
|||
);
|
||||
});
|
||||
|
||||
parsed += generateDocxField(
|
||||
`ADDIN ZOTERO_BIBL {"uncited":[],"omitted":[],"custom":[]} CSL_BIBLIOGRAPHY`,
|
||||
"[BIBLIOGRAPHY] Please click Zotero - Refresh in Word/LibreOffice to update all fields",
|
||||
);
|
||||
if (Object.keys(citationCache).length > 0) {
|
||||
str += generateDocxField(
|
||||
`ADDIN ZOTERO_BIBL {"uncited":[],"omitted":[],"custom":[]} CSL_BIBLIOGRAPHY`,
|
||||
"[BIBLIOGRAPHY] Please click Zotero - Refresh in Word/LibreOffice to update all fields",
|
||||
);
|
||||
}
|
||||
|
||||
return parsed;
|
||||
return str;
|
||||
}
|
||||
|
||||
function getCitationID(citationCache: CitationCache) {
|
||||
let citationID = Zotero.Utilities.randomString();
|
||||
while (citationID in citationCache) {
|
||||
citationID = Zotero.Utilities.randomString();
|
||||
function getCacheID(cache: Record<string, any>, defaultValue: any) {
|
||||
let id = Zotero.Utilities.randomString();
|
||||
while (id in cache) {
|
||||
id = Zotero.Utilities.randomString();
|
||||
}
|
||||
citationCache[citationID] = {
|
||||
field: "",
|
||||
text: "",
|
||||
};
|
||||
return citationID;
|
||||
cache[id] = defaultValue;
|
||||
return id;
|
||||
}
|
||||
|
||||
function generateDocxField(fieldCode: string, text: string) {
|
||||
|
|
@ -218,6 +244,8 @@ ${text}
|
|||
<![endif]-->`;
|
||||
}
|
||||
|
||||
type MathCache = Record<string, { math: string }>;
|
||||
|
||||
async function getWorker(): Promise<HTMLIFrameElement> {
|
||||
const worker = ztoolkit.UI.createElement(document, "iframe", {
|
||||
properties: {
|
||||
|
|
@ -235,6 +263,34 @@ async function getWorker(): Promise<HTMLIFrameElement> {
|
|||
return worker;
|
||||
}
|
||||
|
||||
async function sendWorkerTask(
|
||||
worker: HTMLIFrameElement,
|
||||
type: string,
|
||||
message: any,
|
||||
): Promise<any> {
|
||||
const jobID = randomString(6, new Date().toUTCString());
|
||||
const lock = Zotero.Promise.defer();
|
||||
let retMessage: any;
|
||||
const listener = (ev: MessageEvent) => {
|
||||
if (ev.data.type === `${type}Return` && ev.data.jobID === jobID) {
|
||||
retMessage = ev.data.message;
|
||||
lock.resolve();
|
||||
}
|
||||
};
|
||||
worker.contentWindow?.addEventListener("message", listener);
|
||||
worker.contentWindow?.postMessage(
|
||||
{
|
||||
type,
|
||||
jobID,
|
||||
message,
|
||||
},
|
||||
"*",
|
||||
);
|
||||
await lock.promise;
|
||||
worker.contentWindow?.removeEventListener("message", listener);
|
||||
return retMessage;
|
||||
}
|
||||
|
||||
function destroyWorker(worker: any) {
|
||||
worker.parentNode.removeChild(worker);
|
||||
worker = null;
|
||||
|
|
|
|||
|
|
@ -256,10 +256,13 @@ async function renderNoteHTML(
|
|||
|
||||
const mathDelimiterRegex = /^\$+|\$+$/g;
|
||||
doc.querySelectorAll(".math").forEach((node) => {
|
||||
const displayMode = node.innerHTML.startsWith("$$");
|
||||
node.innerHTML = katex.renderToString(
|
||||
node.innerHTML.replace(mathDelimiterRegex, ""),
|
||||
{
|
||||
throwOnError: false,
|
||||
// output: "mathml",
|
||||
displayMode,
|
||||
},
|
||||
);
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in New Issue