Closed charlesLoder closed 7 months ago
I tried with tiberian schema (hebrew-transliteration/dist/schemas/tiberianKhan.js
), still working on it, much to do:
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.tiberianKhan = void 0;
const additionalFeatureTransliteration = require("../rules").additionalFeatureTransliteration;
exports.tiberianKhan = {
VOCAL_SHEVA: "ǝ",
HATAF_SEGOL: "ɛ",
HATAF_PATAH: "a",
HATAF_QAMATS: "o",
HIRIQ: "i",
TSERE: "e",
SEGOL: "ɛ",
PATAH: "a",
QAMATS: "ɔ",
HOLAM: "o",
QUBUTS: "u",
DAGESH: "",
DAGESH_CHAZAQ: true,
MAQAF: "-",
PASEQ: "",
SOF_PASUQ: "",
QAMATS_QATAN: "ɔ",
FURTIVE_PATAH: "a",
HIRIQ_YOD: "i:",
TSERE_YOD: "e:",
SEGOL_YOD: "ɛ:",
SHUREQ: "u:",
HOLAM_VAV: "o:",
QAMATS_HE: "ɔ:",
SEGOL_HE: "ɛ:",
TSERE_HE: "e:",
MS_SUFX: "ɔw",
ALEF: "ʔ",
BET: "v",
BET_DAGESH: "b",
GIMEL: "ʁ",
GIMEL_DAGESH: "g",
DALET: "ð",
DALET_DAGESH: "d",
HE: "h",
VAV: "v",
ZAYIN: "z",
HET: "ħ",
TET: "tˁ",
YOD: "j",
FINAL_KAF: "χ",
KAF: "χ",
KAF_DAGESH: "kʰ",
LAMED: "l",
FINAL_MEM: "m",
MEM: "m",
FINAL_NUN: "n",
NUN: "n",
SAMEKH: "s",
AYIN: "ʕ",
FINAL_PE: "f",
PE: "f",
PE_DAGESH: "pʰ",
FINAL_TSADI: "sˁ",
TSADI: "sˁ",
QOF: "q̟",
RESH: "ʀ̟",
SHIN: "ʃ",
SIN: "s",
TAV: "θ",
TAV_DAGESH: "tʰ",
DIVINE_NAME: "yhwh",
STRESS_MARKER: { location: "before-syllable", mark: "ˈ" },
/*ADDITIONAL_FEATURES: [
{ FEATURE: "syllable", HEBREW: "[\u05D0]$", TRANSLITERATION: "" },
//{ FEATURE: "syllable", HEBREW: "[\u05B4]$", TRANSLITERATION: "i:" },
//{ FEATURE: "syllable", HEBREW: "[\u05B5]$", TRANSLITERATION: "e:" },
//{ FEATURE: "syllable", HEBREW: "[\u05B6]$", TRANSLITERATION: "ɛ:" },
//{ FEATURE: "syllable", HEBREW: "[\u05B7]$", TRANSLITERATION: "a:" },
//{ FEATURE: "syllable", HEBREW: "[\u05B8]$", TRANSLITERATION: "ɔ:" },
//{ FEATURE: "syllable", HEBREW: "[\u05B9]$", TRANSLITERATION: "o:" },
//{ FEATURE: "syllable", HEBREW: "[\u05BB]$", TRANSLITERATION: "u:" },
/////{ FEATURE: "cluster", HEBREW: "[\u05B1]", TRANSLITERATION: "ɛ" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B4", TRANSLITERATION: "iʔi" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B5", TRANSLITERATION: "eʔe" },
//{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B6", TRANSLITERATION: "ɛʔɛ" }, // !!! //
//{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B7", TRANSLITERATION: "aʔa" }, // !!! //
{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B8", TRANSLITERATION: "ɔʔɔ" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05B9", TRANSLITERATION: "oʔo" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D0\u05BB", TRANSLITERATION: "uʔu" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B4", TRANSLITERATION: "ihi" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B5", TRANSLITERATION: "ehe" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B6", TRANSLITERATION: "ɛhɛ" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B7", TRANSLITERATION: "aha" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B8", TRANSLITERATION: "ɔhɔ" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D4\u05B9", TRANSLITERATION: "oho" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D4\u05BB", TRANSLITERATION: "uhu" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B4", TRANSLITERATION: "iħi" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B5", TRANSLITERATION: "eħe" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B6", TRANSLITERATION: "ɛħɛ" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B7", TRANSLITERATION: "aħa" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B8", TRANSLITERATION: "ɔħɔ" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D7\u05B9", TRANSLITERATION: "oħo" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D7\u05BB", TRANSLITERATION: "uħu" },
{ FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B4", TRANSLITERATION: "iʕi" },
{ FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B5", TRANSLITERATION: "eʕe" },
{ FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B6", TRANSLITERATION: "ɛʕɛ" },
{ FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B7", TRANSLITERATION: "aʕa" },
{ FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B8", TRANSLITERATION: "ɔʕɔ" },
{ FEATURE: "word", HEBREW: "\u05B0\u05E2\u05B9", TRANSLITERATION: "oʕo" },
{ FEATURE: "word", HEBREW: "\u05B0\u05E2\u05BB", TRANSLITERATION: "uʕu" },
{ FEATURE: "word", HEBREW: "\u05B0\u05D9", TRANSLITERATION: "i:" }
],*/
ADDITIONAL_FEATURES: [
{
FEATURE: "cluster",
HEBREW: "\u05B0",
TRANSLITERATION: (cluster, transliteration, schema) => {
const shewa = new RegExp(transliteration, "u");
const clusterText = cluster.text;
/**
* @type {Cluster}
*/
const next = cluster.next;
const gutturalYodVowel = /[אהחעי]([\u{05B1}-\u{05BB}\u{05C7}])/u;
const match = next.text.match(gutturalYodVowel);
if (shewa.test(clusterText) && match) {
return additionalFeatureTransliteration(clusterText, shewa, match[1], schema);
}
return clusterText;
}
}
],
longVowels: false,
qametsQatan: false,
sqnmlvy: true,
wawShureq: false,
article: true,
allowNoNiqqud: false,
strict: true
};
Sample for what we should accomplish: Genesis 1:1-4
@johnlockejrr
Thanks for all this!
In the branch with the new callback function for additional features, the callback gives access to the Word
, Syllable
, or Cluster
objects and their newly added properties in v0.13.x.
Right now, I'm running into a bit of a wall. Calling something like syllable.vowelName
could return something that matches a schema property. I was envisioning it being used like this:
{
FEATURE: "syllable",
HEBREW: "\u{05B0}",
TRANSLITERATION: (syllable, hebrew, schema) => {
const next = syllable.next;
if(next && next.vowelName) {
// renamed function below from additionalFeatureTransliteration
return replaceAndTransliterate(syllable.text, new Regex(hebrew, "u"), schema[next.vowelName], schema);
}
}
}
The problem, however, is this schema[next.vowelName]
which lacks type safety...
Not totally sure how to resolve other than merging these two packages into a monorepo or heavily refactoring the schema interface — probably the latter
Probably the latter I think too.
Refactoring allows for something a little more elegant:
const heb = require("./dist/index");
const rules = require("./dist/rules");
const result = heb.transliterate("בְּרֵאשִׁ֖ית וַיַּבְדֵּל", {
ADDITIONAL_FEATURES: [
{
// matches any sheva in a syllable that is NOT preceded by a vowel character
HEBREW: "(?<![\u{05B1}-\u{05BB}\u{05C7}].*)\u{05B0}",
FEATURE: "syllable",
TRANSLITERATION: function (syllable, _hebrew, schema) {
const next = syllable.next;
// discrepancy here: in havarotjs SHEVA is simply the character
// whereas transliteration is concerned with a specific sheva, a vocal sheva
const nextVowel = next.vowelName === "SHEVA" ? "VOCAL_SHEVA" : next.vowelName;
if (next && nextVowel) {
const vowel = schema[nextVowel] || "";
// replaceAndTransliterate is an internal helper function
return rules.replaceAndTransliterate(syllable.text, new RegExp("\u{05B0}", "u"), vowel, schema);
}
return syllable.text;
}
}
]
});
// bērēʾšît wayyabdēl
Though the regex is a little more complicated, it ensures that the sheva being matched is likely a vocal one.
thinking out loud: the ADDITIONAL_FEATURES
property was originally designed with orthographic features in mind. Perhaps an ADDITIONAL_RULES
could be a possible future property where the rule could match on something simpler like
syl.vowelName === "SHEVA"
bērēʾšît wayyabdēl
would be wrong because shewa is a short vowel and the b
in the second word is spirantizated to v
, in Tiberian transcription proposed by Khan we should have baʀ̟eːˈʃiːiθ waɟɟav'deːel
or if you want something like barē'šît wayyav'dēl
@johnlockejrr
Checkout this branch for tiberian.
If you could look through the tests, and let me know what is incorrect.
Feel free to push changes or just comment here
Ok. I'll do that
On Thu, 16 Feb 2023 at 03:27, Charles Loder @.***> wrote:
@johnlockejrr https://github.com/johnlockejrr
Checkout this branch for tiberian https://github.com/charlesLoder/hebrew-transliteration/tree/tiberian.
If you could look through the tests, and let me know what is incorrect.
Feel free to push changes or just comment here
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1432382166, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHVWAGUJDXVGCI6ZOU3WXWGAHANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
Quite close!!! Need some more little work but we are almost there:
hebrew-transliteration output:
Geoffrey Khan:
NOTES:
We should:
VOCAL_SHEVA: "ǝ"
to VOCAL_SHEVA: "a"
(my bad!)ɟɟ
and not jj
בְּרֵאשִׁית
as בַּרֵאשִׁית
but with the rule of vowel
lenghtening that would have give the reciter something like ba:ʀ̟eːˈʃiːiθ
with long PATACH in open syllable, with SHEVA we
have baʀ̟eːˈʃiːiθ
.iːi
in ʔɛloːˈhiːim
or eːe
in ˈʔeːeθ
), in Khan's words "when a long vowel occurs in a closed syllable, an epenthetic vowel is inserted after the long vowel before the syllable final consonant", e.g. דָּבָר [dɔːˈvɔ:ɔʀ̟], [ʃɔːˈmɑːɑʀ̟].bǝʀ̟eʔʃi:θ bɔˈʀ̟ɔʔ
(and elsewhere) should be dropped.The shewa (שְׁוָא) sign (אְ) in the Tiberian vocalization system was read either as a vowel or as zero When shewa was read as vocalic, its quality in the Tiberian tradition was by default the same as that of the pataḥ vowel sign, i.e., the maximally low vowel [a] e.g. תְּכַסֶּה [tʰaχasˈsɛː] "you (ms) cover" מְדַּבְּרִים [maðabbaˈʀ̟iːim] "speaking (mpl)"
In the Tiberian tradition, when vocalic shewa occurs before a guttural consonant or the letter yod, it was realized with a different quality through an assimilatory process (i) before a guttural (אהחע) it was realized as a short vowel with the quality of the vowel on the guttural e.g. בְּעֶרְכְּךָ [bɛʕɛʀ̟kʰaˈχɔː] "by your evaluation" וְהָיָה [vɔhɔːˈjɔː] "and it became" בְּאֵר [beˈʔeːeʀ̟] "well" מְאוֹד [moˈʔoːoð] "very" מְחִיר [miˈħiːiʀ̟] "price" מְעוּכָה [muʕuːˈχɔː] "pressed" (ii) before yod, it was realized as a short vowel with the quality of short ḥireq [i] e.g. בְּיוֹם [biˈjoːom] "on the day" לְיִשְׂרָאֵל [lijisrˁɔːˈʔeːel] "to Israel" תְּדַמְּיוּן [tʰaðammiˈjuːun] "you liken (mpl)"
The shewa sign is combined with some of the basic vowel signs to form the so-called ḥaṭef signs (i) ḥaṭef pataḥ (אֲ) [a] (ii) ḥaṭef segol (אֱ) [ɛ] (iii) ḥaṭef qameṣ (אֳ) [ɔ] In such signs the vocalic reading of the shewa is made explicit and also its quality The default pronunciation of vocalic shewa with the quality of [a] was equivalent to that of the ḥaṭef pataḥ sign (אֲ) Both the vocalic shewa and the vowels expressed by ḥaṭef signs were short vowels that, in principle, had the same quantity as short vowels in closed unstressed syllables, which were represented in standard Tiberian vocalization by a simple vowel sign.
Let me take these a little at a time.
ammend in the schema VOCAL_SHEVA: "ǝ" to VOCAL_SHEVA: "a" (my bad!)
Ok, that one is easy.
YOD with DAGGESH is pronounced ɟɟ and not jj
I think I got this correct, see test
quiescent ALEPH in bǝʀ̟eʔʃi:θ bɔˈʀ̟ɔʔ (and elsewhere) should be dropped.
That makes sense. See tests on the following lines, and let me know if they're correct at least in regards to the aleph:
and
The rest will take a little more time to get to.
Yes, you are correct!
Forgot about DIVINE_NAME: "yhwh"
, it was pronounced according to the vowels written:
יֱהוִה֙ [ʔɛloːˈhiːim]
יְהוָֹ֤ה [ʔaðo:ˈnɔ:j]
the epenthetic vowel in glide was pronounced like: רוּחַ [ˈʀ̟uːwaħ], שִׂיחַ [ˈsiːjaħ] etc.
See test:
Forgot about DIVINE_NAME: "yhwh", it was pronounced according to the vowels written:
That one is easy enough:
Still have to work on the long vowels and sheva.
Had a baby a few months ago, hence the stop-and-go work on this
Splendid! Now we are even closer. Good work @charlesLoder
Congratulations on the baby!
Just realizing I forgot to add a test for שִׂיחַ [ˈsiːjaħ]
Take a look at all these, and let me know if I'm missing something.
What about a vav/yod before a he (not even sure if that happens)?
All seem right, besides the long vowels of course. גָּבֹ֗הַּ gɔˈvo:ah כִּשְׁמֹ֤עַ kʰiʃˈmo:aʕ נֹ֖חַ ˈno:aħ
Summary:
What about a vav/yod before a he (not even sure if that happens)?
Not sure I'm following.
NOTE:
Many words carry a secondary stress in addition to the main stress (fortunatelly this is noted with the cantillation marks), e.g. הָ֣אָדָ֔ם [ˌhɔːʔɔːˈðɔːm] ‘the man’ (Gen. 2.19), נִֽתְחַכְּמָ֖ה [ˌniːθḥakkaˈmɔː] ‘let us deal wisely’ (Exod. 1.10).
What about a vav/yod before a he (not even sure if that happens)? Not sure I'm following.
The furtive patach tests have a vav or yod before a chet or ayin. I'm trying to think if there are any words with a furtive patach before a he (e.g. גָּבֹ֗הַּ), where the he is preceded by a vav or yod.
Many words carry a secondary stress in addition to the main stress (fortunatelly this is noted with the cantillation marks), e.g. הָ֣אָדָ֔ם [ˌhɔːʔɔːˈðɔːm] ‘the man’ (Gen. 2.19), נִֽתְחַכְּמָ֖ה [ˌniːθḥakkaˈmɔː] ‘let us deal wisely’ (Exod. 1.10).
This would be a feature to build out. I also really need to update the isAccented
property on the Syllable
object.
Will look at vowel length next
What about a vav/yod before a he (not even sure if that happens)?
It happens: מַגְבִּ֥יהַּ תַּגְבִּ֣יהַּ יַגִּ֥יהַּ יַגְבִּ֣יהַּ אֱלֹ֨והַּ
I will try to find with vav too, I think there are. EDIT: found in BHS only אֱלֹ֨והַּ.
Other patach furtives: מָנֹ֜וחַ לָשׂ֥וּחַ יֵשׁ֡וּעַ אֲבִישׁ֥וּעַ וּמַלְכִּישׁ֑וּעַ שְׁלִ֔יחַ רֵ֣יח
Also, don't forget about the SHEVA rules when you got time.
In the Tiberian tradition, when vocalic shewa occurs before a guttural consonant or the letter yod, it was realized with a different quality through an assimilatory process (i) before a guttural (אהחע) it was realized as a short vowel with the quality of the vowel on the guttural e.g. בְּעֶרְכְּךָ [bɛʕɛʀ̟kʰaˈχɔː] "by your evaluation" וְהָיָה [vɔhɔːˈjɔː] "and it became" בְּאֵר [beˈʔeːeʀ̟] "well" מְאוֹד [moˈʔoːoð] "very" מְחִיר [miˈħiːiʀ̟] "price" מְעוּכָה [muʕuːˈχɔː] "pressed" (ii) before yod, it was realized as a short vowel with the quality of short ḥireq [i] e.g. בְּיוֹם [biˈjoːom] "on the day" לְיִשְׂרָאֵל [lijisrˁɔːˈʔeːel] "to Israel" תְּדַמְּיוּן [tʰaðammiˈjuːun] "you liken (mpl)"
@johnlockejrr
Another round of work.
More furtive tests Take a look at these. They should be correct in terms of being preceded by a vav or yod. The long vowels aren't correct in this commit https://github.com/charlesLoder/hebrew-transliteration/commit/db97c62c3c693dc26d4e26dc36b98eaa762292bb
Epenthetic vowel
When a long vowel occurs in a closed syllable, an epenthetic vowel is inserted after the long vowel before the syllable final consonant
These long vowels are going to be tricky....
See the updated tests here https://github.com/charlesLoder/hebrew-transliteration/commit/37bbdc1c04148a8625d84dbcc46190e98c222f2b
Could you comment on each line whether it is correct or not. A simple 👍 if it's correct, and if it's not correct, then comment with the correct value.
I have commented on not correct ones, I hope I didn't make any mistakes, I could ask Khan to correct but maybe a little later.
What's the latest branch with Tiberian Schema?
Tried on the latest. Genesis 1
Khan:
@johnlockejrr
Just updated the branch.
I'm struggling a bit with the vowel length stuff.
The most recent commit fro Gen 1:1-5 produces:
Someways it's closer, other ways it's way off
Yes, way closer! We are on the right path :)
Same branch gave me this for Gen 1:1-5:
One note (or two), the prolonged vowel appears only in accented closed syllable so not in hɔːʔɔːˈʀ̟ɛːɛsˁ
that should be hɔː'ʔɔːʀ̟ɛsˁ
, in bɔːˈʀ̟ɔːɔ
we should have only bɔːˈʀ̟ɔː
because Aleph is quiescent so it doesn't prolong the already long vowel.
We should also get rid of the Yod as mater e.g. uːˈveːejn
that should be wuˈveːen
or ʔɛːloːˈhiːijm
that should be ʔɛloːˈhiːim
etc.
Also the quality of the Sheva before gutturals and Yod: not vaˈħoːʃɛχ
but voˈħoːʃɛχ
, not vaˈʔeːeθ
but veˈʔeːeθ
etc.
Ok, some more progress is being made, but now I'm hitting up against some deeper issues related to the syllabification package:
And some other issues I'm still trying to figure out.
I'm going to remove this from the v2.4.0
milestone so I can create another release and update the site.
Once I make more substantial changes to the syllabification package, I'll return to this.
It is, however, getting much closer! For Gen 1:1-5 I'm seeing a lot of the same issues occur, so much of it should be resolved soon.
I'm also working on a book project soon so that may take time away from this (too many irons in the fire! 🔥 )
@johnlockejrr
I'm confused by "הָיְתָ֥ה" which is transcribed as "hɔːɔjˈθɔː". He says regarding the first vowel:
Insertion of epenthetic in closed syllable with a long vowel: §I.2.4.
In that section he says:
When long vowels with the main stress occur in closed syllables, there is evidence that an epenthetic with the same quality as that of the long vowel occurred before the final consonant in its phonetic realization
The first vowel, however, does not take the main stress.
Thoughts?
I'm confused too, I will ask Khan, for me, if I'm not mistaken should be "hɔjˈθɔː" the first vowel should be short because is in closed unstressed syllable… what do you think?
On Thu, 27 Apr 2023 at 22:20, Charles Loder @.***> wrote:
@johnlockejrr https://github.com/johnlockejrr
I'm confused by "הָיְתָ֥ה" which is transcribed as "hɔːɔjˈθɔː". He says regarding the first vowel:
Insertion of epenthetic in closed syllable with a long vowel: §I.2.4.
In that section he says:
When long vowels with the main stress occur in closed syllables, there is evidence that an epenthetic with the same quality as that of the long vowel occurred before the final consonant in its phonetic realization
The first vowel, however, does not take the main stress.
Thoughts?
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1526368498, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHVUXFDK6ZPFP6FGQ63XDLIJVANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
The latest commit produces "hɔjˈθɔː", which I think it should be
@johnlockejrr
Any response from Khan?
I've locally been slowly updating the tests to match the output, but only when I can infer that they are correct.
Finishing some projects this next week, then I can shift some attention back to this
Can you refresh my memory?
On Sat, 3 Jun 2023 at 13:56, Charles Loder @.***> wrote:
@johnlockejrr https://github.com/johnlockejrr
Any response from Khan?
I've locally been slowly updating the tests to match the output, but only when I can infer that they are correct.
Finishing some projects this next week, then I can shift some attention back to this
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1574891954, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHQPIVFXRF4B3P7MJNLXJMQ5ZANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
@johnlockejrr ,
This issue is "הָיְתָ֥ה" which is transcribed as "hɔːɔjˈθɔː" in the book, but definitely seems like it should be "hɔjˈθɔː"
I'm 100% sure is an error or a typo in the book.
On Sat, 10 Jun 2023 at 19:56, Charles Loder @.***> wrote:
@johnlockejrr https://github.com/johnlockejrr ,
This issue is "הָיְתָ֥ה" which is transcribed as "hɔːɔjˈθɔː" in the book, but definitely seems like it should be "hɔjˈθɔː"
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1585754955, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHVSCSNE6C3XDH45VMLXKSYNTANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
Ok, I'll keep updating the tests
@johnlockejrr
I made some updates to the tests, primarily with regard to vowel length.
There are, of course, some remaining issues:
וַיֹּ֥אמֶר
it is transliterated as vaˈɟɟoː֥mɛʀ̟
vaɟˈɟoːmɛʀ̟
הָיְתָ֥ה
being transliterated hɔjˈθɔː
instead of the transcribed hɔːɔjˈθɔː
וַֽיְהִי
being transliterated as vaːjihiː
instead of the transcribed vaˑjhiː
קָ֣רָא
being transliterated as ˈq̟ɔːʀ̟ɔː
instead of the transcribed ˈq̟ɔʀ̟ɔː
Other than that, it's getting close!
Yes, very close! I will test them, very nice job!
On Sun, 25 Jun 2023 at 20:16, Charles Loder @.***> wrote:
@johnlockejrr https://github.com/johnlockejrr
I made some updates to the tests, primarily with regard to vowel length.
There are, of course, some remaining issues:
- stress markers in wrong place with doubled consonants
- for a word like וַיֹּ֥אמֶר it is transliterated as vaˈɟɟoː֥mɛʀ̟
- it should be vaɟˈɟoːmɛʀ̟
- a fix is possible, I just haven't quite figured it out
- possbile typos in transcriptions
- we've already discussed הָיְתָ֥ה being transliterated hɔjˈθɔː instead of the transcribed hɔːɔjˈθɔː
- another is וַֽיְהִי being transliterated as vaːjihiː instead of the transcribed vaˑjhiː
- both of these issues contain a yod, so I'll research more
- another is קָ֣רָא being transliterated as ˈq̟ɔːʀ̟ɔː instead of the transcribed ˈq̟ɔʀ̟ɔː
- the resh may be an expection
Other than that, it's getting close!
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1606208849, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHRQCJSDWSUH6WFY37LXNB57VANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
At the root of the repo, I have a test file like this:
const heb = require("./dist/index");
const tiberian = require("./dist/schemas/tiberian").tiberian;
// the first 5 verses of Gen 1
const khan = [
"baʀ̟eːˈʃiːiθ bɔːˈʀ̟ɔː ʔɛloːˈhiːim ˈʔeːeθ haʃʃɔːˈmaːjim veˈʔeːeθ hɔːˈʔɔːʀ̟ɛsˁ",
"vɔhɔːˈʔɔːʀ̟ɛsˁ hɔːɔjˈθɔː ˈθoːhuː vɔːˈvoːhuː voˈħoːʃɛχ ʕal-pʰaˈneː θoˈhoːom vaˈʀ̟uːwaħ ʔɛloːˈhiːim maʀ̟aːˈħɛːfɛθ ʕal-pʰaˈneː hamˈmɔːjim",
"vaɟˈɟoːmɛʀ̟ ʔɛloːˈhiːim jiˈhiː ˈʔoːoʀ̟ ˌvaˑjhiː-ˈʔoːoʀ",
"vaɟˈɟaːaʀ̟ ʔɛloːˈhiːim ʔɛθ-hɔːˈʔoːoʀ̟ kʰiː-ˈtˁoːov vaɟɟavˈdeːel ʔɛloːˈhiːim beːen hɔːˈʔoːoʀ̟ wuˈveːen haːˈħoːʃɛχ",
"vaɟɟiqˈʀ̟ɔː ʔɛloːˈhiːim lɔːˈʔoːoʀ̟ ˈjoːom valaːˈħoːʃɛχ ˈq̟ɔʀ̟ɔː ˈlɔːɔjlɔː ˌvaˑjhiː-ˈʕɛːʀ̟ɛv ˌvaˑjhiː-ˈvo:q̟ɛʀ̟ ˈjoːom ʔɛːˈħɔːɔð"
]
.map((x) => x.split(" "))
.flat();
const inputs = [
"בְּרֵאשִׁ֖ית בָּרָ֣א אֱלֹהִ֑ים אֵ֥ת הַשָּׁמַ֖יִם וְאֵ֥ת הָאָֽרֶץ׃",
"וְהָאָ֗רֶץ הָיְתָ֥ה תֹ֨הוּ֙ וָבֹ֔הוּ וְחֹ֖שֶׁךְ עַל־פְּנֵ֣י תְה֑וֹם וְר֣וּחַ אֱלֹהִ֔ים מְרַחֶ֖פֶת עַל־פְּנֵ֥י הַמָּֽיִם׃",
"וַיֹּ֥אמֶר אֱלֹהִ֖ים יְהִ֣י א֑וֹר וַֽיְהִי־אֽוֹר׃",
"וַיַּ֧רְא אֱלֹהִ֛ים אֶת־הָא֖וֹר כִּי־ט֑וֹב וַיַּבְדֵּ֣ל אֱלֹהִ֔ים בֵּ֥ין הָא֖וֹר וּבֵ֥ין הַחֹֽשֶׁךְ׃",
"וַיִּקְרָ֨א אֱלֹהִ֤ים לָאוֹר֙ י֔וֹם וְלַחֹ֖שֶׁךְ קָ֣רָא לָ֑יְלָה וַֽיְהִי־עֶ֥רֶב וַֽיְהִי־בֹ֖קֶר י֥וֹם אֶחָֽד׃"
]
.map((x) => x.split(" "))
.flat();
const results = inputs
.map((v, i) => {
const t = heb.transliterate(v, tiberian);
if (t === khan[i]) return false;
return {
text: v,
expected: khan[i],
received: t
};
})
.filter(Boolean);
console.log(results);
Which is helpful for finding all the incorrect ones:
[
{ text: 'הָיְתָ֥ה', expected: 'hɔːɔjˈθɔː', received: 'hɔjˈθɔː' },
{ text: 'תֹ֨הוּ֙', expected: 'ˈθoːhuː', received: 'ˈθoːˈhuː' },
{
text: 'הַמָּֽיִם׃',
expected: 'hamˈmɔːjim',
received: 'haˈmmɔːjim'
},
// ... etc.
]
I think קָ֣רָא should be transliterated as q̟ɔː'ʀ̟ɔː
On Sun, 25 Jun 2023 at 20:16, Charles Loder @.***> wrote:
@johnlockejrr https://github.com/johnlockejrr
I made some updates to the tests, primarily with regard to vowel length.
There are, of course, some remaining issues:
- stress markers in wrong place with doubled consonants
- for a word like וַיֹּ֥אמֶר it is transliterated as vaˈɟɟoː֥mɛʀ̟
- it should be vaɟˈɟoːmɛʀ̟
- a fix is possible, I just haven't quite figured it out
- possbile typos in transcriptions
- we've already discussed הָיְתָ֥ה being transliterated hɔjˈθɔː instead of the transcribed hɔːɔjˈθɔː
- another is וַֽיְהִי being transliterated as vaːjihiː instead of the transcribed vaˑjhiː
- both of these issues contain a yod, so I'll research more
- another is קָ֣רָא being transliterated as ˈq̟ɔːʀ̟ɔː instead of the transcribed ˈq̟ɔʀ̟ɔː
- the resh may be an expection
Other than that, it's getting close!
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1606208849, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHRQCJSDWSUH6WFY37LXNB57VANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
I thought that too, but ˈq̟ɔʀ̟ɔː
is the transcription at the end of the book...
I may reach out to Khan soon once I try to tackle the stress marker
I'm sure is a typo because any open syllable lenghtens the vowel otherwise should be a shewa, we should consult Khan anyway.
On Sun, 25 Jun 2023 at 23:51, Charles Loder @.***> wrote:
I thought that too, but ˈq̟ɔʀ̟ɔː is the transcription at the end of the book...
I may reach out to Khan soon once I try to tackle the stress marker
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1606273743, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHV44UZUIA2E36AKSHTXNCXGPANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
@johnlockejrr
Ok, so huge progress!
Using the script above, there are only a handful of different ones!:
[
{ text: 'הָיְתָ֥ה', expected: 'hɔːɔjˈθɔː', received: 'hɔjˈθɔː' },
{ text: 'תֹ֨הוּ֙', expected: 'ˈθoːhuː', received: 'ˈθoːˈhuː' },
{ text: 'בֵּ֥ין', expected: 'beːen', received: 'ˈbeːen' },
{ text: 'קָ֣רָא', expected: 'ˈq̟ɔʀ̟ɔː', received: 'ˈq̟ɔːʀ̟ɔː' }
]
There are 2 categories.
We've already discussed them, but הָיְתָ֥ה and קָ֣רָא may potentially be typos in TPTBH and the results from the package are actually correct.
The havarotjs package doesn't mark stress in the most rigorous way. If the syllable has a taam, it is marked as accented (i.e. stressed). This is why תֹ֨הוּ֙ has two stress markers as ˈθoːˈhuː and why בֵּ֥ין has a stress marker.
I'm willing to let the accents be less than perfect for now.
I'll test a little more, push a dev release of this, make a preview site, and ask Khan for some feedback.
May actually be able to fix the "תֹ֨הוּ֙" in this
Wonderful! I'll do the tests myself too. Very good work @charlesLoder!
On Sun, 2 Jul 2023 at 02:48, Charles Loder @.***> wrote:
@johnlockejrr https://github.com/johnlockejrr
Ok, so huge progress!
Using the script above https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1606222671 , there are only a handful of different ones!:
[ { text: 'הָיְתָ֥ה', expected: 'hɔːɔjˈθɔː', received: 'hɔjˈθɔː' }, { text: 'תֹ֨הוּ֙', expected: 'ˈθoːhuː', received: 'ˈθoːˈhuː' }, { text: 'בֵּ֥ין', expected: 'beːen', received: 'ˈbeːen' }, { text: 'קָ֣רָא', expected: 'ˈq̟ɔʀ̟ɔː', received: 'ˈq̟ɔːʀ̟ɔː' }]
There are 2 categories. potential typos
We've already discussed them, but הָיְתָ֥ה and קָ֣רָא may potentially be typos in TPTBH and the results from the package are actually correct. accents
The havarotjs package doesn't mark stress in the most rigorous way. If the syllable has a taam, it is marked as accented (i.e. stressed). This is why תֹ֨הוּ֙ has two stress markers as ˈθoːˈhuː and why בֵּ֥ין has a stress marker. next steps
I'm willing to let the accents be less than perfect for now.
I'll test a little more, push a dev release of this, make a preview site, and ask Khan for some feedback.
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1616243154, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHUWLSJYR2ALNU5AL5LXODANRANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
The next major hurdle has been the resh. There are two realizations — the standard "ʀ̟" and the pharyngealized "rˁ".
I have the pharyngealized almost figured out, except for one case when the resh
is in the same syllable, or at least the same foot, as a preceding alveolar p229
He does not give an example, however, of a word like תְּפַר where the resh is in a different syllable but same foot as an alveolar, but the resh is not directly preceded by an alveolar. I believe this should be a ʀ̟ but I'm awaiting confirmation
I'm awaiting confirmation
The said they were unaware.
Given the examples and explanations in the book, I'm assuming he means that the resh must be in direct contact with the alveolar. I'll research a little more, but working off the assumption it should be a ʀ̟ for now
Yes, I'm aware of those two realisations of resh, but I'm copletely puzzled about it because I didn't find anything like that in other semitic languages (other than normal R and “emphatic” R in some contexts).
On Tue, 11 Jul 2023 at 04:23, Charles Loder @.***> wrote:
The next major hurdle has been the resh. There are two realizations — the standard "ʀ̟" and the pharyngealized "rˁ".
I have the pharyngealized almost figured out, except for one case when the resh
is in the same syllable, or at least the same foot, as a preceding alveolar p229
He does not give an example, however, of a word like תְּפַר where the resh is in a different syllable but same foot as an alveolar, but the resh is not directly preceded by an alveolar. I believe this should be a ʀ̟ but I'm awaiting confirmation
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1629953188, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHUDQZAXOL5QIZN3MC3XPSTKDANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
Ok, so I got the resh figured out.
The next step is figuring out the patach (I.2.1.2), which is very confusing. He says:
The back quality [ɑ] would have been induced in particular by the environment of consonants involving retraction of the tongue root, especially pharyngeals and pharyngealized consonants.
which makes sense at first glance.
But then on p249, he gives the transcription of [ˈbɑːʕaˌra] for בָּעֲרָ֥ה where the qamets, not patach, is transcribed with [ɑ]. Perhaps another typo? Are those transcriptions from Morag?
Then in a similar word on p621, בַּעֲצַת, he transcribes it as [baːʕɑˈsˁɑːɑθ] with the hatef-patach as [ɑ]. Is the ayin or the tsade causing the conditioned realization?
For the transcription of מַזְרִ֣יעַ on p618–19, he has [mɑzˈrˁiːjaʕ], with a note:
Pataḥ is pronounced as back [ɑ] in the environment of pharyngealized consonants: §I.2.1.3.
The first patach isn't even in the same syllable or foot as the resh, so why not [a], and the second patach precedes a pharyngeal, so why not [ɑ]???
I'm completely lost on this one...
In the second example I'm sure is a typo.
On Sun, 16 Jul 2023 at 03:19, Charles Loder @.***> wrote:
Ok, so I got the resh figured out.
The next step is figuring out the patach (I.2.1.2), which is very confusing. He says:
The back quality [ɑ] would have been induced in particular by the environment of consonants involving retraction of the tongue root, especially pharyngeals and pharyngealized consonants.
which makes sense at first glance.
But then on p249, he gives the transcription of [ˈbɑːʕaˌra] for בָּעֲרָ֥ה where the qamets, not patach, is transcribed with [ɑ]. Perhaps another typo? Are those transcriptions from Morag?
For the transcription of מַזְרִ֣יעַ on p618–19, he has [mɑzˈrˁiːjaʕ], with a note:
Pataḥ is pronounced as back [ɑ] in the environment of pharyngealized consonants: §I.2.1.3.
The first patach isn't even in the same syllable or foot as the resh, so why not [a], and the second patach precedes a pharyngeal, so why not [ɑ]???
I'm completely lost on this one...
— Reply to this email directly, view it on GitHub https://github.com/charlesLoder/hebrew-transliteration/issues/45#issuecomment-1636926379, or unsubscribe https://github.com/notifications/unsubscribe-auth/AD44GHTC2OZIVETPKAUSXJTXQMXP3ANCNFSM6AAAAAAUDGEC2M . You are receiving this because you were mentioned.Message ID: @.***>
So Khan said:
If I transcribed בָּעֲרָ֥ה as [ˈbɑːʕaˌra], this is a mistake. The transcription should be [bɔːʕaˌrɔː].
Which makes sense with regard to the qamets, but I still would have expected an [ɑ].
I asked a follow up regarding מַזְרִ֣יעַ but have not received an answer back.
See discussion here
Will definitely need a test under
test/schemas
.