hangul.js (5586B)
1 /* Many of the calculations used within this file can be referenced back to 2 * http://www.kfunigraz.ac.at/~katzer/korean_hangul_unicode.html 3 * 4 * Unfortunately, this site is now dead. I've gone ahead and archived a version 5 * of it at ./resources/korean_hangul_syllabary_in_unicode_archive.html for 6 * future reference. 7 */ 8 export const hangulUnicodeRange = { 9 lower: 0xAC00, 10 upper: 0xD7A3, 11 }; 12 const hangulTailUnicodeOffset = 0x11A8; 13 const hangulVowelUnicodeOffset = 0x314f; 14 const hangulLeadUnicodeOffset = 0x1100; 15 const hangulEmptyConsonant = 0x11A7; 16 /* Used to track modifications that have been made to characters. It keeps 17 * track of characters' original padchims (for ㄷ -> ㄹ irregulars) and if the 18 * character has no padchim but should be treated as if it does (for ㅅ 19 * irregulars). When substrings are extracted the Geulja class keeps these 20 * markers for the last character only. 21 */ 22 export class Geulja { 23 constructor(value) { 24 this.value = ""; 25 this.length = 0; 26 this.hidden_padchim = false; 27 this.original_padchim = null; 28 this.value = value; 29 this.length = value.length; 30 return new Proxy(this, { 31 get: (obj, key) => { 32 if (typeof (key) === "string" && (Number.isInteger(Number(key)))) { 33 const index = Number(key); 34 if (index < 0 || index >= obj.value.length) { 35 return undefined; 36 } 37 return obj.getItem(index); 38 } 39 else { 40 return obj[key]; 41 } 42 }, 43 }); 44 } 45 toString() { 46 return this.value; 47 } 48 slice(start, end) { 49 return this.value.slice(start, end); 50 } 51 substring(start, end) { 52 return this.value.substring(start, end); 53 } 54 split(separator, limit) { 55 return this.value.split(separator, limit); 56 } 57 endsWith(searchString) { 58 return this.value.endsWith(searchString); 59 } 60 startsWith(searchString, position) { 61 return this.value.startsWith(searchString, position); 62 } 63 getItem(index) { 64 const geulja = new Geulja(this.value.charAt(index)); 65 // Only keep the hidden padchim marker for the last item. 66 if (index === this.value.length - 1) { 67 geulja.hidden_padchim = this.hidden_padchim; 68 geulja.original_padchim = this.original_padchim; 69 } 70 return geulja; 71 } 72 } 73 export function isHangul(character) { 74 if (character.length !== 1) { 75 throw new Error("isHangeul only checks characters with a length of 1"); 76 } 77 const code = character.charCodeAt(0); 78 return code >= hangulUnicodeRange.lower && code <= hangulUnicodeRange.upper; 79 } 80 export function findVowelToAppend(geulja) { 81 const reversed = [...`${geulja}`].reverse(); 82 for (const char of reversed) { 83 if (["뜨", "쓰", "트"].includes(char)) { 84 return "어"; 85 } 86 if (getVowel(char) === "ㅡ" && !getPadchim(char)) { 87 continue; 88 } 89 if (["ㅗ", "ㅏ", "ㅑ"].includes(getVowel(char))) { 90 return "아"; 91 } 92 else { 93 return "어"; 94 } 95 } 96 return "어"; 97 } 98 /** 99 * Assembly a set of Jamo characters. 100 */ 101 export function join(lead, vowel, padchim = null) { 102 const lead_offset = lead.charCodeAt(0) - hangulLeadUnicodeOffset; 103 const vowel_offset = vowel.charCodeAt(0) - hangulVowelUnicodeOffset; 104 const padchim_offset = padchim ? padchim.charCodeAt(0) - hangulTailUnicodeOffset : -1; 105 return String.fromCharCode(padchim_offset + vowel_offset * 28 + lead_offset * 588 + hangulUnicodeRange.lower + 1); 106 } 107 export function getLead(geulja) { 108 const character_code = `${geulja}`.charCodeAt(0); 109 const relative_lead_code = Math.floor((character_code - hangulUnicodeRange.lower) / 588); 110 const lead_code = (relative_lead_code + hangulLeadUnicodeOffset); 111 return String.fromCharCode(lead_code); 112 } 113 export function getVowel(geulja) { 114 const padchim = getPadchim(geulja); 115 const padchim_offset = padchim === null ? -1 : padchim.charCodeAt(0) - hangulTailUnicodeOffset; 116 const character_code = `${geulja}`.charCodeAt(0); 117 const relative_vowel_code = Math.floor(((character_code - hangulUnicodeRange.lower - padchim_offset) % 588) / 28); 118 const vowel_code = (relative_vowel_code + hangulVowelUnicodeOffset); 119 const vowel = String.fromCharCode(vowel_code); 120 return vowel; 121 } 122 export function hasHiddenPadchim(geulja) { 123 if (geulja instanceof Geulja) { 124 if (geulja.hidden_padchim) 125 return true; 126 } 127 return false; 128 } 129 export function getPadchim(geulja) { 130 if (geulja instanceof Geulja) { 131 if (geulja.original_padchim) 132 return geulja.original_padchim; 133 } 134 const character_code = `${geulja}`.charCodeAt(0); 135 const relative_tail_code = (character_code - hangulUnicodeRange.lower) % 28; 136 const tail_code = (relative_tail_code + hangulTailUnicodeOffset) - 1; 137 const padchim = String.fromCharCode(tail_code); 138 return tail_code === hangulEmptyConsonant ? null : padchim; 139 } 140 export function matchGeulja(geulja, lead = undefined, vowel = undefined, padchim = undefined) { 141 const matchesLead = lead !== undefined ? getLead(geulja) === lead : true; 142 const matchesVowel = vowel !== undefined ? getVowel(geulja) === vowel : true; 143 const matchesPadchim = padchim !== undefined ? (hasHiddenPadchim(geulja) || getPadchim(geulja)) === padchim : true; 144 return matchesLead && matchesVowel && matchesPadchim; 145 }