add: export docx with math
This commit is contained in:
parent
6e3094c69c
commit
622cef7cf3
|
|
@ -6,6 +6,10 @@
|
||||||
src="chrome://__addonRef__/content/scripts/docxWorker.js"
|
src="chrome://__addonRef__/content/scripts/docxWorker.js"
|
||||||
type="application/javascript"
|
type="application/javascript"
|
||||||
></script>
|
></script>
|
||||||
|
<script
|
||||||
|
src="chrome://__addonRef__/content/lib/js/SaxonJS2.rt.js"
|
||||||
|
type="application/javascript"
|
||||||
|
></script>
|
||||||
</head>
|
</head>
|
||||||
<body></body>
|
<body></body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -98,6 +98,7 @@
|
||||||
"release-it": "^16.1.5",
|
"release-it": "^16.1.5",
|
||||||
"replace-in-file": "^7.0.1",
|
"replace-in-file": "^7.0.1",
|
||||||
"typescript": "^5.2.2",
|
"typescript": "^5.2.2",
|
||||||
|
"xslt3": "^2.6.0",
|
||||||
"zotero-types": "^1.3.5"
|
"zotero-types": "^1.3.5"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
npx xslt3 -t -xsl:scripts/docx/mml2omml.xsl -export:addon/chrome/content/lib/js/mml2omml.sef.json -nogo -relocate:on -ns:##html5
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,13 +1,32 @@
|
||||||
// @ts-ignore
|
import { config } from "../../package.json";
|
||||||
|
|
||||||
|
// @ts-ignore defined by html-docx-js
|
||||||
import htmlDocx from "html-docx-js/dist/html-docx";
|
import htmlDocx from "html-docx-js/dist/html-docx";
|
||||||
|
|
||||||
|
const XSL_PATH = `chrome://${config.addonRef}/content/lib/js/mml2omml.sef.json`;
|
||||||
|
|
||||||
// this runs in a iframe. accept input message
|
// this runs in a iframe. accept input message
|
||||||
// and return output message
|
// and return output message
|
||||||
onmessage = ({ data: { type, jobId, message } }) => {
|
onmessage = async ({ data: { type, jobID, message } }) => {
|
||||||
if (type === "parseDocx") {
|
if (type === "parseDocx") {
|
||||||
console.log("DOCX Worker", type, jobId, message);
|
console.log("DOCX Worker", type, jobID, message);
|
||||||
const blob = htmlDocx.asBlob(message);
|
const blob = htmlDocx.asBlob(message);
|
||||||
console.log("DOCX Worker", blob);
|
postMessage({ type: "parseDocxReturn", jobID, message: blob }, "*");
|
||||||
postMessage({ type: "parseDocxReturn", jobId, message: blob }, "*");
|
} else if (type === "parseMML") {
|
||||||
|
console.log("MML Worker", type, jobID, message);
|
||||||
|
// @ts-ignore defined by SaxonJS
|
||||||
|
const result = await SaxonJS.transform(
|
||||||
|
{
|
||||||
|
stylesheetLocation: XSL_PATH,
|
||||||
|
sourceType: "xml",
|
||||||
|
sourceText: message,
|
||||||
|
destination: "serialized",
|
||||||
|
},
|
||||||
|
"async",
|
||||||
|
);
|
||||||
|
postMessage(
|
||||||
|
{ type: "parseMMLReturn", jobID, message: result.principalResult },
|
||||||
|
"*",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,11 @@ export async function saveDocx(filename: string, noteId: number) {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function note2docx(noteItem: Zotero.Item) {
|
async function note2docx(noteItem: Zotero.Item) {
|
||||||
const renderedContent = parseDocxCitationFields(
|
const worker = await getWorker();
|
||||||
|
|
||||||
|
const renderedContent = await parseDocxFields(
|
||||||
await renderNoteHTML(noteItem),
|
await renderNoteHTML(noteItem),
|
||||||
|
worker,
|
||||||
);
|
);
|
||||||
let htmlDoc =
|
let htmlDoc =
|
||||||
'<!DOCTYPE html>\n<html lang="en"><head><meta charset="UTF-8"></head>\n';
|
'<!DOCTYPE html>\n<html lang="en"><head><meta charset="UTF-8"></head>\n';
|
||||||
|
|
@ -23,36 +26,46 @@ async function note2docx(noteItem: Zotero.Item) {
|
||||||
|
|
||||||
ztoolkit.log(`[Note2DOCX] ${htmlDoc}`);
|
ztoolkit.log(`[Note2DOCX] ${htmlDoc}`);
|
||||||
|
|
||||||
let blob: ArrayBufferLike;
|
const blob = await sendWorkerTask(worker, "parseDocx", htmlDoc);
|
||||||
const lock = Zotero.Promise.defer();
|
|
||||||
const jobId = randomString(6, new Date().toUTCString());
|
|
||||||
const listener = (ev: MessageEvent) => {
|
|
||||||
if (ev.data.type === "parseDocxReturn" && ev.data.jobId === jobId) {
|
|
||||||
blob = ev.data.message;
|
|
||||||
lock.resolve();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
const worker = await getWorker();
|
|
||||||
worker.contentWindow?.addEventListener("message", listener);
|
|
||||||
worker.contentWindow?.postMessage(
|
|
||||||
{
|
|
||||||
type: "parseDocx",
|
|
||||||
jobId,
|
|
||||||
message: htmlDoc,
|
|
||||||
},
|
|
||||||
"*",
|
|
||||||
);
|
|
||||||
await lock.promise;
|
|
||||||
worker.contentWindow?.removeEventListener("message", listener);
|
|
||||||
destroyWorker(worker);
|
destroyWorker(worker);
|
||||||
return blob!;
|
return blob!;
|
||||||
}
|
}
|
||||||
|
|
||||||
type CitationCache = Record<string, { field: string; text: string }>;
|
type CitationCache = Record<string, { field: string; text: string }>;
|
||||||
|
|
||||||
function parseDocxCitationFields(html: string) {
|
async function parseDocxFields(html: string, worker: HTMLIFrameElement) {
|
||||||
const parser = new DOMParser();
|
const parser = new DOMParser();
|
||||||
const doc = parser.parseFromString(html, "text/html");
|
const doc = parser.parseFromString(html, "text/html");
|
||||||
|
|
||||||
|
// Remove katex html elements to prevent duplicate rendering
|
||||||
|
doc.querySelectorAll(".katex-html").forEach((elem) => {
|
||||||
|
elem.remove();
|
||||||
|
});
|
||||||
|
|
||||||
|
const mathCache = {} as MathCache;
|
||||||
|
|
||||||
|
for (const elem of Array.from(doc.querySelectorAll("math"))) {
|
||||||
|
let str = (await sendWorkerTask(
|
||||||
|
worker,
|
||||||
|
"parseMML",
|
||||||
|
elem.outerHTML,
|
||||||
|
)) as string;
|
||||||
|
if (!str) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
str = str.replaceAll('<?xml version="1.0" encoding="UTF-8"?>', "");
|
||||||
|
if (elem.getAttribute("display") === "block") {
|
||||||
|
str = `<m:oMathPara xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math">${str}</m:oMathPara>`;
|
||||||
|
}
|
||||||
|
const newElem = doc.createElement("span");
|
||||||
|
const mathID = getCacheID(mathCache, {
|
||||||
|
math: "",
|
||||||
|
});
|
||||||
|
mathCache[mathID].math = str;
|
||||||
|
newElem.setAttribute("data-bn-math-index", mathID);
|
||||||
|
elem.parentNode!.replaceChild(newElem, elem);
|
||||||
|
}
|
||||||
|
|
||||||
const citationCache = {} as CitationCache;
|
const citationCache = {} as CitationCache;
|
||||||
/*
|
/*
|
||||||
[
|
[
|
||||||
|
|
@ -110,7 +123,10 @@ function parseDocxCitationFields(html: string) {
|
||||||
properties.formattedCitation = formattedCitation;
|
properties.formattedCitation = formattedCitation;
|
||||||
properties.plainCitation = formattedCitation + " ";
|
properties.plainCitation = formattedCitation + " ";
|
||||||
properties.noteIndex = 0;
|
properties.noteIndex = 0;
|
||||||
const citationID = getCitationID(citationCache);
|
const citationID = getCacheID(citationCache, {
|
||||||
|
field: "",
|
||||||
|
text: "",
|
||||||
|
});
|
||||||
|
|
||||||
const csl = {
|
const csl = {
|
||||||
citationID,
|
citationID,
|
||||||
|
|
@ -171,11 +187,22 @@ function parseDocxCitationFields(html: string) {
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
const str = doc.body.innerHTML;
|
let str = doc.body.innerHTML;
|
||||||
|
|
||||||
|
// Replace all <span data-bn-math-index="T21wEH05"></span> with <!--[if gte msEquation 12]><m:oMath...</m:oMath><![endif]-->
|
||||||
|
const mathRegexp = /<span data-bn-math-index="([^"]+)"><\/span>/g;
|
||||||
|
str = str.replace(mathRegexp, (match, p1) => {
|
||||||
|
return `<!--[if gte msEquation 12]>${mathCache[p1].math}<![endif]-->`;
|
||||||
|
});
|
||||||
|
|
||||||
|
str = str.replaceAll(
|
||||||
|
"http://schemas.openxmlformats.org/officeDocument/2006/math",
|
||||||
|
"http://schemas.microsoft.com/office/2004/12/omml",
|
||||||
|
);
|
||||||
|
|
||||||
// Replace all <span data-bn-citation-index="T21wEH05"></span> with ADDIN ZOTERO_ITEM CSL_CITATION {...}
|
// Replace all <span data-bn-citation-index="T21wEH05"></span> with ADDIN ZOTERO_ITEM CSL_CITATION {...}
|
||||||
const re = /<span data-bn-citation-index="([^"]+)"><\/span>/g;
|
const citationRegexp = /<span data-bn-citation-index="([^"]+)"><\/span>/g;
|
||||||
let parsed = str.replace(re, (match, p1) => {
|
str = str.replace(citationRegexp, (match, p1) => {
|
||||||
return generateDocxField(
|
return generateDocxField(
|
||||||
`ADDIN ZOTERO_ITEM CSL_CITATION ${htmlEscape(
|
`ADDIN ZOTERO_ITEM CSL_CITATION ${htmlEscape(
|
||||||
doc,
|
doc,
|
||||||
|
|
@ -185,24 +212,23 @@ function parseDocxCitationFields(html: string) {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
parsed += generateDocxField(
|
if (Object.keys(citationCache).length > 0) {
|
||||||
`ADDIN ZOTERO_BIBL {"uncited":[],"omitted":[],"custom":[]} CSL_BIBLIOGRAPHY`,
|
str += generateDocxField(
|
||||||
"[BIBLIOGRAPHY] Please click Zotero - Refresh in Word/LibreOffice to update all fields",
|
`ADDIN ZOTERO_BIBL {"uncited":[],"omitted":[],"custom":[]} CSL_BIBLIOGRAPHY`,
|
||||||
);
|
"[BIBLIOGRAPHY] Please click Zotero - Refresh in Word/LibreOffice to update all fields",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return parsed;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
function getCitationID(citationCache: CitationCache) {
|
function getCacheID(cache: Record<string, any>, defaultValue: any) {
|
||||||
let citationID = Zotero.Utilities.randomString();
|
let id = Zotero.Utilities.randomString();
|
||||||
while (citationID in citationCache) {
|
while (id in cache) {
|
||||||
citationID = Zotero.Utilities.randomString();
|
id = Zotero.Utilities.randomString();
|
||||||
}
|
}
|
||||||
citationCache[citationID] = {
|
cache[id] = defaultValue;
|
||||||
field: "",
|
return id;
|
||||||
text: "",
|
|
||||||
};
|
|
||||||
return citationID;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function generateDocxField(fieldCode: string, text: string) {
|
function generateDocxField(fieldCode: string, text: string) {
|
||||||
|
|
@ -218,6 +244,8 @@ ${text}
|
||||||
<![endif]-->`;
|
<![endif]-->`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MathCache = Record<string, { math: string }>;
|
||||||
|
|
||||||
async function getWorker(): Promise<HTMLIFrameElement> {
|
async function getWorker(): Promise<HTMLIFrameElement> {
|
||||||
const worker = ztoolkit.UI.createElement(document, "iframe", {
|
const worker = ztoolkit.UI.createElement(document, "iframe", {
|
||||||
properties: {
|
properties: {
|
||||||
|
|
@ -235,6 +263,34 @@ async function getWorker(): Promise<HTMLIFrameElement> {
|
||||||
return worker;
|
return worker;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function sendWorkerTask(
|
||||||
|
worker: HTMLIFrameElement,
|
||||||
|
type: string,
|
||||||
|
message: any,
|
||||||
|
): Promise<any> {
|
||||||
|
const jobID = randomString(6, new Date().toUTCString());
|
||||||
|
const lock = Zotero.Promise.defer();
|
||||||
|
let retMessage: any;
|
||||||
|
const listener = (ev: MessageEvent) => {
|
||||||
|
if (ev.data.type === `${type}Return` && ev.data.jobID === jobID) {
|
||||||
|
retMessage = ev.data.message;
|
||||||
|
lock.resolve();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
worker.contentWindow?.addEventListener("message", listener);
|
||||||
|
worker.contentWindow?.postMessage(
|
||||||
|
{
|
||||||
|
type,
|
||||||
|
jobID,
|
||||||
|
message,
|
||||||
|
},
|
||||||
|
"*",
|
||||||
|
);
|
||||||
|
await lock.promise;
|
||||||
|
worker.contentWindow?.removeEventListener("message", listener);
|
||||||
|
return retMessage;
|
||||||
|
}
|
||||||
|
|
||||||
function destroyWorker(worker: any) {
|
function destroyWorker(worker: any) {
|
||||||
worker.parentNode.removeChild(worker);
|
worker.parentNode.removeChild(worker);
|
||||||
worker = null;
|
worker = null;
|
||||||
|
|
|
||||||
|
|
@ -256,10 +256,13 @@ async function renderNoteHTML(
|
||||||
|
|
||||||
const mathDelimiterRegex = /^\$+|\$+$/g;
|
const mathDelimiterRegex = /^\$+|\$+$/g;
|
||||||
doc.querySelectorAll(".math").forEach((node) => {
|
doc.querySelectorAll(".math").forEach((node) => {
|
||||||
|
const displayMode = node.innerHTML.startsWith("$$");
|
||||||
node.innerHTML = katex.renderToString(
|
node.innerHTML = katex.renderToString(
|
||||||
node.innerHTML.replace(mathDelimiterRegex, ""),
|
node.innerHTML.replace(mathDelimiterRegex, ""),
|
||||||
{
|
{
|
||||||
throwOnError: false,
|
throwOnError: false,
|
||||||
|
// output: "mathml",
|
||||||
|
displayMode,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue