commit 96dc4b2b2a71d3a66e295830f67dea4734b69153
parent f8d42155194f2184dadc2e433661b8a4310d81be
Author: Frédéric Wang <fred.wang@free.fr>
Date: Thu, 20 Dec 2012 17:41:40 +0100
make toMathML.js handle non-BMP characters properly. Fix issue 361.
Diffstat:
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/unpacked/extensions/toMathML.js b/unpacked/extensions/toMathML.js
@@ -1,3 +1,5 @@
+/* -*- Mode: Javascript; indent-tabs-mode:nil; js-indent-level: 2 -*- */
+/* vim: set ts=2 et sw=2 tw=80: */
/*************************************************************
*
* MathJax/extensions/toMathML.js
@@ -106,11 +108,24 @@ MathJax.Hub.Register.LoadHook("[MathJax]/jax/element/mml/jax.js",function () {
string = String(string).split("");
for (var i = 0, m = string.length; i < m; i++) {
var n = string[i].charCodeAt(0);
- if (n < 0x20 || n > 0x7E) {
- string[i] = "&#x"+n.toString(16).toUpperCase()+";";
- } else {
- var c = {'&':'&', '<':'<', '>':'>', '"':'"'}[string[i]];
- if (c) {string[i] = c}
+ if (n <= 0xD7FF || 0xE000 <= n) {
+ // Code points U+0000 to U+D7FF and U+E000 to U+FFFF.
+ // They are directly represented by n.
+ if (n < 0x20 || n > 0x7E) {
+ string[i] = "&#x"+n.toString(16).toUpperCase()+";";
+ } else {
+ var c =
+ {'&':'&', '<':'<', '>':'>', '"':'"'}[string[i]];
+ if (c) {string[i] = c}
+ }
+ } else if (i+1 < m) {
+ // Code points U+10000 to U+10FFFF.
+ // n is the lead surrogate, let's read the trail surrogate.
+ var trailSurrogate = string[i+1].charCodeAt(0);
+ var codePoint = (((n-0xD800)*0x400)+(trailSurrogate-0xDC00)+0x10000);
+ string[i] = "&#x"+codePoint.toString(16).toUpperCase()+";";
+ string[i+1] = "";
+ i++;
}
}
return string.join("");