modules/arabic.js

  1. /**
  2. * @license
  3. * Copyright (c) 2017 Aras Abbasi
  4. *
  5. * Licensed under the MIT License.
  6. * http://opensource.org/licenses/mit-license
  7. */
  8. import { jsPDF } from "../jspdf.js";
  9. /**
  10. * jsPDF arabic parser PlugIn
  11. *
  12. * @name arabic
  13. * @module
  14. */
  15. (function(jsPDFAPI) {
  16. "use strict";
  17. /**
  18. * Arabic shape substitutions: char code => (isolated, final, initial, medial).
  19. * Arabic Substition A
  20. */
  21. var arabicSubstitionA = {
  22. 0x0621: [0xfe80], // ARABIC LETTER HAMZA
  23. 0x0622: [0xfe81, 0xfe82], // ARABIC LETTER ALEF WITH MADDA ABOVE
  24. 0x0623: [0xfe83, 0xfe84], // ARABIC LETTER ALEF WITH HAMZA ABOVE
  25. 0x0624: [0xfe85, 0xfe86], // ARABIC LETTER WAW WITH HAMZA ABOVE
  26. 0x0625: [0xfe87, 0xfe88], // ARABIC LETTER ALEF WITH HAMZA BELOW
  27. 0x0626: [0xfe89, 0xfe8a, 0xfe8b, 0xfe8c], // ARABIC LETTER YEH WITH HAMZA ABOVE
  28. 0x0627: [0xfe8d, 0xfe8e], // ARABIC LETTER ALEF
  29. 0x0628: [0xfe8f, 0xfe90, 0xfe91, 0xfe92], // ARABIC LETTER BEH
  30. 0x0629: [0xfe93, 0xfe94], // ARABIC LETTER TEH MARBUTA
  31. 0x062a: [0xfe95, 0xfe96, 0xfe97, 0xfe98], // ARABIC LETTER TEH
  32. 0x062b: [0xfe99, 0xfe9a, 0xfe9b, 0xfe9c], // ARABIC LETTER THEH
  33. 0x062c: [0xfe9d, 0xfe9e, 0xfe9f, 0xfea0], // ARABIC LETTER JEEM
  34. 0x062d: [0xfea1, 0xfea2, 0xfea3, 0xfea4], // ARABIC LETTER HAH
  35. 0x062e: [0xfea5, 0xfea6, 0xfea7, 0xfea8], // ARABIC LETTER KHAH
  36. 0x062f: [0xfea9, 0xfeaa], // ARABIC LETTER DAL
  37. 0x0630: [0xfeab, 0xfeac], // ARABIC LETTER THAL
  38. 0x0631: [0xfead, 0xfeae], // ARABIC LETTER REH
  39. 0x0632: [0xfeaf, 0xfeb0], // ARABIC LETTER ZAIN
  40. 0x0633: [0xfeb1, 0xfeb2, 0xfeb3, 0xfeb4], // ARABIC LETTER SEEN
  41. 0x0634: [0xfeb5, 0xfeb6, 0xfeb7, 0xfeb8], // ARABIC LETTER SHEEN
  42. 0x0635: [0xfeb9, 0xfeba, 0xfebb, 0xfebc], // ARABIC LETTER SAD
  43. 0x0636: [0xfebd, 0xfebe, 0xfebf, 0xfec0], // ARABIC LETTER DAD
  44. 0x0637: [0xfec1, 0xfec2, 0xfec3, 0xfec4], // ARABIC LETTER TAH
  45. 0x0638: [0xfec5, 0xfec6, 0xfec7, 0xfec8], // ARABIC LETTER ZAH
  46. 0x0639: [0xfec9, 0xfeca, 0xfecb, 0xfecc], // ARABIC LETTER AIN
  47. 0x063a: [0xfecd, 0xfece, 0xfecf, 0xfed0], // ARABIC LETTER GHAIN
  48. 0x0641: [0xfed1, 0xfed2, 0xfed3, 0xfed4], // ARABIC LETTER FEH
  49. 0x0642: [0xfed5, 0xfed6, 0xfed7, 0xfed8], // ARABIC LETTER QAF
  50. 0x0643: [0xfed9, 0xfeda, 0xfedb, 0xfedc], // ARABIC LETTER KAF
  51. 0x0644: [0xfedd, 0xfede, 0xfedf, 0xfee0], // ARABIC LETTER LAM
  52. 0x0645: [0xfee1, 0xfee2, 0xfee3, 0xfee4], // ARABIC LETTER MEEM
  53. 0x0646: [0xfee5, 0xfee6, 0xfee7, 0xfee8], // ARABIC LETTER NOON
  54. 0x0647: [0xfee9, 0xfeea, 0xfeeb, 0xfeec], // ARABIC LETTER HEH
  55. 0x0648: [0xfeed, 0xfeee], // ARABIC LETTER WAW
  56. 0x0649: [0xfeef, 0xfef0, 64488, 64489], // ARABIC LETTER ALEF MAKSURA
  57. 0x064a: [0xfef1, 0xfef2, 0xfef3, 0xfef4], // ARABIC LETTER YEH
  58. 0x0671: [0xfb50, 0xfb51], // ARABIC LETTER ALEF WASLA
  59. 0x0677: [0xfbdd], // ARABIC LETTER U WITH HAMZA ABOVE
  60. 0x0679: [0xfb66, 0xfb67, 0xfb68, 0xfb69], // ARABIC LETTER TTEH
  61. 0x067a: [0xfb5e, 0xfb5f, 0xfb60, 0xfb61], // ARABIC LETTER TTEHEH
  62. 0x067b: [0xfb52, 0xfb53, 0xfb54, 0xfb55], // ARABIC LETTER BEEH
  63. 0x067e: [0xfb56, 0xfb57, 0xfb58, 0xfb59], // ARABIC LETTER PEH
  64. 0x067f: [0xfb62, 0xfb63, 0xfb64, 0xfb65], // ARABIC LETTER TEHEH
  65. 0x0680: [0xfb5a, 0xfb5b, 0xfb5c, 0xfb5d], // ARABIC LETTER BEHEH
  66. 0x0683: [0xfb76, 0xfb77, 0xfb78, 0xfb79], // ARABIC LETTER NYEH
  67. 0x0684: [0xfb72, 0xfb73, 0xfb74, 0xfb75], // ARABIC LETTER DYEH
  68. 0x0686: [0xfb7a, 0xfb7b, 0xfb7c, 0xfb7d], // ARABIC LETTER TCHEH
  69. 0x0687: [0xfb7e, 0xfb7f, 0xfb80, 0xfb81], // ARABIC LETTER TCHEHEH
  70. 0x0688: [0xfb88, 0xfb89], // ARABIC LETTER DDAL
  71. 0x068c: [0xfb84, 0xfb85], // ARABIC LETTER DAHAL
  72. 0x068d: [0xfb82, 0xfb83], // ARABIC LETTER DDAHAL
  73. 0x068e: [0xfb86, 0xfb87], // ARABIC LETTER DUL
  74. 0x0691: [0xfb8c, 0xfb8d], // ARABIC LETTER RREH
  75. 0x0698: [0xfb8a, 0xfb8b], // ARABIC LETTER JEH
  76. 0x06a4: [0xfb6a, 0xfb6b, 0xfb6c, 0xfb6d], // ARABIC LETTER VEH
  77. 0x06a6: [0xfb6e, 0xfb6f, 0xfb70, 0xfb71], // ARABIC LETTER PEHEH
  78. 0x06a9: [0xfb8e, 0xfb8f, 0xfb90, 0xfb91], // ARABIC LETTER KEHEH
  79. 0x06ad: [0xfbd3, 0xfbd4, 0xfbd5, 0xfbd6], // ARABIC LETTER NG
  80. 0x06af: [0xfb92, 0xfb93, 0xfb94, 0xfb95], // ARABIC LETTER GAF
  81. 0x06b1: [0xfb9a, 0xfb9b, 0xfb9c, 0xfb9d], // ARABIC LETTER NGOEH
  82. 0x06b3: [0xfb96, 0xfb97, 0xfb98, 0xfb99], // ARABIC LETTER GUEH
  83. 0x06ba: [0xfb9e, 0xfb9f], // ARABIC LETTER NOON GHUNNA
  84. 0x06bb: [0xfba0, 0xfba1, 0xfba2, 0xfba3], // ARABIC LETTER RNOON
  85. 0x06be: [0xfbaa, 0xfbab, 0xfbac, 0xfbad], // ARABIC LETTER HEH DOACHASHMEE
  86. 0x06c0: [0xfba4, 0xfba5], // ARABIC LETTER HEH WITH YEH ABOVE
  87. 0x06c1: [0xfba6, 0xfba7, 0xfba8, 0xfba9], // ARABIC LETTER HEH GOAL
  88. 0x06c5: [0xfbe0, 0xfbe1], // ARABIC LETTER KIRGHIZ OE
  89. 0x06c6: [0xfbd9, 0xfbda], // ARABIC LETTER OE
  90. 0x06c7: [0xfbd7, 0xfbd8], // ARABIC LETTER U
  91. 0x06c8: [0xfbdb, 0xfbdc], // ARABIC LETTER YU
  92. 0x06c9: [0xfbe2, 0xfbe3], // ARABIC LETTER KIRGHIZ YU
  93. 0x06cb: [0xfbde, 0xfbdf], // ARABIC LETTER VE
  94. 0x06cc: [0xfbfc, 0xfbfd, 0xfbfe, 0xfbff], // ARABIC LETTER FARSI YEH
  95. 0x06d0: [0xfbe4, 0xfbe5, 0xfbe6, 0xfbe7], //ARABIC LETTER E
  96. 0x06d2: [0xfbae, 0xfbaf], // ARABIC LETTER YEH BARREE
  97. 0x06d3: [0xfbb0, 0xfbb1] // ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
  98. };
  99. /*
  100. var ligaturesSubstitutionA = {
  101. 0xFBEA: []// ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH ALEF ISOLATED FORM
  102. };
  103. */
  104. var ligatures = {
  105. 0xfedf: {
  106. 0xfe82: 0xfef5, // ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
  107. 0xfe84: 0xfef7, // ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
  108. 0xfe88: 0xfef9, // ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW ISOLATED FORM
  109. 0xfe8e: 0xfefb // ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
  110. },
  111. 0xfee0: {
  112. 0xfe82: 0xfef6, // ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
  113. 0xfe84: 0xfef8, // ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
  114. 0xfe88: 0xfefa, // ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW FINAL FORM
  115. 0xfe8e: 0xfefc // ARABIC LIGATURE LAM WITH ALEF FINAL FORM
  116. },
  117. 0xfe8d: { 0xfedf: { 0xfee0: { 0xfeea: 0xfdf2 } } }, // ALLAH
  118. 0x0651: {
  119. 0x064c: 0xfc5e, // Shadda + Dammatan
  120. 0x064d: 0xfc5f, // Shadda + Kasratan
  121. 0x064e: 0xfc60, // Shadda + Fatha
  122. 0x064f: 0xfc61, // Shadda + Damma
  123. 0x0650: 0xfc62 // Shadda + Kasra
  124. }
  125. };
  126. var arabic_diacritics = {
  127. 1612: 64606, // Shadda + Dammatan
  128. 1613: 64607, // Shadda + Kasratan
  129. 1614: 64608, // Shadda + Fatha
  130. 1615: 64609, // Shadda + Damma
  131. 1616: 64610 // Shadda + Kasra
  132. };
  133. var alfletter = [1570, 1571, 1573, 1575];
  134. var noChangeInForm = -1;
  135. var isolatedForm = 0;
  136. var finalForm = 1;
  137. var initialForm = 2;
  138. var medialForm = 3;
  139. jsPDFAPI.__arabicParser__ = {};
  140. //private
  141. var isInArabicSubstitutionA = (jsPDFAPI.__arabicParser__.isInArabicSubstitutionA = function(
  142. letter
  143. ) {
  144. return typeof arabicSubstitionA[letter.charCodeAt(0)] !== "undefined";
  145. });
  146. var isArabicLetter = (jsPDFAPI.__arabicParser__.isArabicLetter = function(
  147. letter
  148. ) {
  149. return (
  150. typeof letter === "string" &&
  151. /^[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]+$/.test(
  152. letter
  153. )
  154. );
  155. });
  156. var isArabicEndLetter = (jsPDFAPI.__arabicParser__.isArabicEndLetter = function(
  157. letter
  158. ) {
  159. return (
  160. isArabicLetter(letter) &&
  161. isInArabicSubstitutionA(letter) &&
  162. arabicSubstitionA[letter.charCodeAt(0)].length <= 2
  163. );
  164. });
  165. var isArabicAlfLetter = (jsPDFAPI.__arabicParser__.isArabicAlfLetter = function(
  166. letter
  167. ) {
  168. return (
  169. isArabicLetter(letter) && alfletter.indexOf(letter.charCodeAt(0)) >= 0
  170. );
  171. });
  172. jsPDFAPI.__arabicParser__.arabicLetterHasIsolatedForm = function(letter) {
  173. return (
  174. isArabicLetter(letter) &&
  175. isInArabicSubstitutionA(letter) &&
  176. arabicSubstitionA[letter.charCodeAt(0)].length >= 1
  177. );
  178. };
  179. var arabicLetterHasFinalForm = (jsPDFAPI.__arabicParser__.arabicLetterHasFinalForm = function(
  180. letter
  181. ) {
  182. return (
  183. isArabicLetter(letter) &&
  184. isInArabicSubstitutionA(letter) &&
  185. arabicSubstitionA[letter.charCodeAt(0)].length >= 2
  186. );
  187. });
  188. jsPDFAPI.__arabicParser__.arabicLetterHasInitialForm = function(letter) {
  189. return (
  190. isArabicLetter(letter) &&
  191. isInArabicSubstitutionA(letter) &&
  192. arabicSubstitionA[letter.charCodeAt(0)].length >= 3
  193. );
  194. };
  195. var arabicLetterHasMedialForm = (jsPDFAPI.__arabicParser__.arabicLetterHasMedialForm = function(
  196. letter
  197. ) {
  198. return (
  199. isArabicLetter(letter) &&
  200. isInArabicSubstitutionA(letter) &&
  201. arabicSubstitionA[letter.charCodeAt(0)].length == 4
  202. );
  203. });
  204. var resolveLigatures = (jsPDFAPI.__arabicParser__.resolveLigatures = function(
  205. letters
  206. ) {
  207. var i = 0;
  208. var tmpLigatures = ligatures;
  209. var result = "";
  210. var effectedLetters = 0;
  211. for (i = 0; i < letters.length; i += 1) {
  212. if (typeof tmpLigatures[letters.charCodeAt(i)] !== "undefined") {
  213. effectedLetters++;
  214. tmpLigatures = tmpLigatures[letters.charCodeAt(i)];
  215. if (typeof tmpLigatures === "number") {
  216. result += String.fromCharCode(tmpLigatures);
  217. tmpLigatures = ligatures;
  218. effectedLetters = 0;
  219. }
  220. if (i === letters.length - 1) {
  221. tmpLigatures = ligatures;
  222. result += letters.charAt(i - (effectedLetters - 1));
  223. i = i - (effectedLetters - 1);
  224. effectedLetters = 0;
  225. }
  226. } else {
  227. tmpLigatures = ligatures;
  228. result += letters.charAt(i - effectedLetters);
  229. i = i - effectedLetters;
  230. effectedLetters = 0;
  231. }
  232. }
  233. return result;
  234. });
  235. jsPDFAPI.__arabicParser__.isArabicDiacritic = function(letter) {
  236. return (
  237. letter !== undefined &&
  238. arabic_diacritics[letter.charCodeAt(0)] !== undefined
  239. );
  240. };
  241. var getCorrectForm = (jsPDFAPI.__arabicParser__.getCorrectForm = function(
  242. currentChar,
  243. beforeChar,
  244. nextChar
  245. ) {
  246. if (!isArabicLetter(currentChar)) {
  247. return -1;
  248. }
  249. if (isInArabicSubstitutionA(currentChar) === false) {
  250. return noChangeInForm;
  251. }
  252. if (
  253. !arabicLetterHasFinalForm(currentChar) ||
  254. (!isArabicLetter(beforeChar) && !isArabicLetter(nextChar)) ||
  255. (!isArabicLetter(nextChar) && isArabicEndLetter(beforeChar)) ||
  256. (isArabicEndLetter(currentChar) && !isArabicLetter(beforeChar)) ||
  257. (isArabicEndLetter(currentChar) && isArabicAlfLetter(beforeChar)) ||
  258. (isArabicEndLetter(currentChar) && isArabicEndLetter(beforeChar))
  259. ) {
  260. return isolatedForm;
  261. }
  262. if (
  263. arabicLetterHasMedialForm(currentChar) &&
  264. isArabicLetter(beforeChar) &&
  265. !isArabicEndLetter(beforeChar) &&
  266. isArabicLetter(nextChar) &&
  267. arabicLetterHasFinalForm(nextChar)
  268. ) {
  269. return medialForm;
  270. }
  271. if (isArabicEndLetter(currentChar) || !isArabicLetter(nextChar)) {
  272. return finalForm;
  273. }
  274. return initialForm;
  275. });
  276. /**
  277. * @name processArabic
  278. * @function
  279. * @param {string} text
  280. * @returns {string}
  281. */
  282. var parseArabic = function(text) {
  283. text = text || "";
  284. var result = "";
  285. var i = 0;
  286. var j = 0;
  287. var position = 0;
  288. var currentLetter = "";
  289. var prevLetter = "";
  290. var nextLetter = "";
  291. var words = text.split("\\s+");
  292. var newWords = [];
  293. for (i = 0; i < words.length; i += 1) {
  294. newWords.push("");
  295. for (j = 0; j < words[i].length; j += 1) {
  296. currentLetter = words[i][j];
  297. prevLetter = words[i][j - 1];
  298. nextLetter = words[i][j + 1];
  299. if (isArabicLetter(currentLetter)) {
  300. position = getCorrectForm(currentLetter, prevLetter, nextLetter);
  301. if (position !== -1) {
  302. newWords[i] += String.fromCharCode(
  303. arabicSubstitionA[currentLetter.charCodeAt(0)][position]
  304. );
  305. } else {
  306. newWords[i] += currentLetter;
  307. }
  308. } else {
  309. newWords[i] += currentLetter;
  310. }
  311. }
  312. newWords[i] = resolveLigatures(newWords[i]);
  313. }
  314. result = newWords.join(" ");
  315. return result;
  316. };
  317. var processArabic = (jsPDFAPI.__arabicParser__.processArabic = jsPDFAPI.processArabic = function() {
  318. var text =
  319. typeof arguments[0] === "string" ? arguments[0] : arguments[0].text;
  320. var tmpText = [];
  321. var result;
  322. if (Array.isArray(text)) {
  323. var i = 0;
  324. tmpText = [];
  325. for (i = 0; i < text.length; i += 1) {
  326. if (Array.isArray(text[i])) {
  327. tmpText.push([parseArabic(text[i][0]), text[i][1], text[i][2]]);
  328. } else {
  329. tmpText.push([parseArabic(text[i])]);
  330. }
  331. }
  332. result = tmpText;
  333. } else {
  334. result = parseArabic(text);
  335. }
  336. if (typeof arguments[0] === "string") {
  337. return result;
  338. } else {
  339. arguments[0].text = result;
  340. return arguments[0];
  341. }
  342. });
  343. jsPDFAPI.events.push(["preProcessText", processArabic]);
  344. })(jsPDF.API);