diff --git a/TeXmacs/tests/tmu/204_1.tmu b/TeXmacs/tests/tmu/204_1.tmu new file mode 100644 index 0000000000000000000000000000000000000000..540eeb83097e80297962c70b44ae0c4c9fe3539a --- /dev/null +++ b/TeXmacs/tests/tmu/204_1.tmu @@ -0,0 +1,82 @@ +> + +> + +<\body> + MMMMMM + + (行首和行位对于夹注符号0.5字宽,多打一些字测试行尾的情,测测测测测测测测测测测测测)测)测测测测测测测测测测测测测测 + + 【行首和行位对于夹注符号0.5字宽,多打一些字测试行尾的情,测测测测测测测测测测测测测】测)测测测测测测测测测测测测测测 + + “行首和行位对于夹注符号0.5字宽,多打一些字测试行尾的情况,测测测测测测测测测测测测”测测)测测测测测测测测测测测测测测 + + MMMMMM + + 。(右测有调整空间的普通符号后面跟左夹注符号,普通符号右侧调整0.25字宽,左夹注符号左侧调整0.25字宽,总共1.5字宽 + + ,【右测有调整空间的普通符号后面跟左夹注符号,普通符号右侧调整0.25字宽,左夹注符号左侧调整0.25字宽,总共1.5字宽 + + ;【右测有调整空间的普通符号后面跟左夹注符号,普通符号右侧调整0.25字宽,左夹注符号左侧调整0.25字宽,总共1.5字宽 + + :《右测有调整空间的普通符号后面跟左夹注符号,普通符号右侧调整0.25字宽,左夹注符号左侧调整0.25字宽,总共1.5字宽 + + 、「右测有调整空间的普通符号后面跟左夹注符号,普通符号右侧调整0.25字宽,左夹注符号左侧调整0.25字宽,总共1.5字宽 + + MMMMMMMMM + + )。右侧夹注符号后面跟普通符号,右侧夹注符号右侧调整0.5字宽,普通符号不做调整,总共1.5字宽 + + 】。右侧夹注符号后面跟普通符号,右侧夹注符号右侧调整0.5字宽,普通符号不做调整,总共1.5字宽 + + 》。右侧夹注符号后面跟普通符号,右侧夹注符号右侧调整0.5字宽,普通符号不做调整,总共1.5字宽 + + ”。右侧夹注符号后面跟普通符号,右侧夹注符号右侧调整0.5字宽,普通符号不做调整,总共1.5字宽 + + 」。右侧夹注符号后面跟普通符号,右侧夹注符号右侧调整0.5字宽,普通符号不做调整,总共1.5字宽 + + )?右侧夹注符号后面跟普通符号,右侧夹注符号右侧调整0.5字宽,普通符号不做调整,总共1.5字宽 + + MMMMMMMMMMM + + 】】右侧夹注符号碰到右侧夹注符号,前面的右侧夹注符号缩减0.5字宽,后面的不做调整,总共1.5字宽 + + 】】】】】如果是多个连续的右侧夹注符号,前面的全部缩减0.5字宽,最后一个不做调整,前面任意两个1字宽,最后两个1.5字宽 + + 】)》」”不同的连续右侧夹注符号处理方式和上面一样 + + MMMMMMMMMMMMMMMM + + 】(右侧夹注符号碰到左侧夹注符号,右夹注符号右侧缩减0.25字宽,左夹注符号左侧缩减0.25字宽,总共1.5字宽 + + 】“右侧夹注符号碰到左侧夹注符号,右夹注符号右侧缩减0.25字宽,左夹注符号左侧缩减0.25字宽,总共1.5字宽 + + 】《右侧夹注符号碰到左侧夹注符号,右夹注符号右侧缩减0.25字宽,左夹注符号左侧缩减0.25字宽,总共1.5字宽 + + \; + + MMMMMMM + + M《「“左侧夹注符号碰到左侧夹注符号,后面的左侧夹注符号左侧缩减0.5字宽,最前面的不做调整,总共1.5字宽 + + MMMMMMMM + + M(。左夹注符号跟普通符号,中间没有调整空间,不做调整 + + MMMMMMMMMMMMMM + + ,。对于没有夹注符号的连续符号,不做处理 + + MMMMMMMMMMMMMMMMMMMM + + 连续的符号测试「」「」「」「」「《》《》《》、??《》》》》???》」 + + \; + + +<\initial> + <\collection> + + + + diff --git a/devel/204_1.md b/devel/204_1.md new file mode 100644 index 0000000000000000000000000000000000000000..25ce3d9d7b40d86b44b5e1f35802790f17ea5877 --- /dev/null +++ b/devel/204_1.md @@ -0,0 +1,46 @@ +# 204_1 CJK标点符号间距调整功能实现 + +## 2025/01/17 + +### Why +在TeXmacs中处理中日韩(CJK)文本时,标点符号的间距调整是一个重要的排版需求。特别是对于夹注符号(如括号)和普通标点符号之间的间距,需要根据不同的上下文进行智能调整,以符合中文排版的规范和美观要求。现有的自动间距功能无法满足这种精细的标点符号间距控制需求。https://www.w3.org/TR/clreq/#punctuation_width_adjustment 参考 6.3.2 标点符号的宽度调整 + +### How +分为以下几类情况: +1. 右侧有调整空间的符号(不包括右夹注符号),下一个(右侧)符号是夹注符号,调小右侧空间 + 1.1 如果下一个夹注符号是右夹注符号,那么调小右侧空间 0.5 字宽 + 1.2 如果下一个夹注符号是左夹注符号,那么调小右侧空间 0.25 字宽 +2. 右夹注符号,只有右侧有调整空间,下一个(右侧)是符号,调小右侧空间 + 2.1 如果下一个符号是左侧有调整空间的符号(只有左夹注符号),那么调小右侧空间 0.25字宽 + 2.2 如果下一个符号是其他符号,那么调小右侧空间 0.5字宽 + +3. 左夹注符号(左侧可调整符号只有左夹注符号),只有左侧有调整空间,上一个(左侧)是符号,调小左侧空间 + 3.1 如果上一个符号是右侧有调整空间的符号(包括右夹注符号),那么调小左侧空间 0.25字宽 + 3.2 如果上一个符号是其他符号,那么调小左侧空间0.5字宽 + +### What +本次修改实现了以下功能和影响: + +#### 解决的问题 +1. **CJK标点符号间距智能调整**:解决了中文排版中标点符号间距不符合规范的问题 +2. **夹注符号特殊处理**:针对括号等夹注符号提供了专门的间距调整逻辑 +3. **上下文感知调整**:根据符号在行中的位置(行首、行尾、中间)和相邻符号类型进行差异化调整 + +#### 系统影响范围 +1. **Typeset模块扩展**:扩展了box基类接口,影响所有box子类 +2. **段落处理增强**:增强了lazy_paragraph的处理能力,增加了新的数据结构和算法 +3. **文本框功能增强**:为text_box添加了kerning收缩功能 + +#### 测试验证 +添加了测试文件`TeXmacs/tests/tmu/204_1.tmu`,包含以下测试场景: +- 行首和行尾夹注符号的0.5字宽调整 +- 普通符号后跟左夹注符号的1.5字宽调整 +- 夹注符号后跟普通符号的1.5字宽调整 +- 连续符号的混合字宽调整(前两个1字宽,后续1.5字宽) + +#### 兼容性考虑 +1. **向后兼容**:新添加的虚函数在基类中提供默认实现,不影响现有代码 +2. **性能影响**:新增的标点符号识别和调整逻辑仅在CJK语言环境下启用 +3. **可扩展性**:通过哈希集合设计,便于后续添加新的标点符号类型 + +这次实现为TeXmacs的CJK排版功能提供了重要的增强,特别是在标点符号间距的精细控制方面,为用户提供了更加专业和美观的中文排版体验。 \ No newline at end of file diff --git a/src/Graphics/Fonts/font_protrusion.cpp b/src/Graphics/Fonts/font_protrusion.cpp index 16f7e69d2c7270cb40b3909d6899959189ccafc4..eff117729a3f98005da8e6e099813e337b8fa574 100644 --- a/src/Graphics/Fonts/font_protrusion.cpp +++ b/src/Graphics/Fonts/font_protrusion.cpp @@ -147,30 +147,38 @@ add_western (hashmap& t, string font_name, bool right) { void add_cjk_left_protrusion (hashmap& t) { + t ("<#2018>")= 0.5; + t ("<#201C>")= 0.5; t ("<#3008>")= 0.5; t ("<#300A>")= 0.5; t ("<#300C>")= 0.5; t ("<#300E>")= 0.5; + t ("<#3010>")= 0.5; t ("<#3016>")= 0.5; t ("<#3018>")= 0.5; t ("<#301A>")= 0.5; t ("<#301D>")= 0.5; + t ("<#FF08>")= 0.5; } void add_cjk_right_protrusion (hashmap& t) { + t ("<#2019>")= 0.5; + t ("<#201D>")= 0.5; t ("<#3001>")= 0.5; t ("<#3002>")= 0.5; t ("<#3009>")= 0.5; t ("<#300B>")= 0.5; t ("<#300D>")= 0.5; t ("<#300F>")= 0.5; + t ("<#3011>")= 0.5; t ("<#3017>")= 0.5; t ("<#3019>")= 0.5; t ("<#301B>")= 0.5; t ("<#301E>")= 0.5; t ("<#301F>")= 0.5; t ("<#FF01>")= 0.5; + t ("<#FF09>")= 0.5; t ("<#FF0C>")= 0.5; t ("<#FF0E>")= 0.5; t ("<#FF1A>")= 0.5; diff --git a/src/Typeset/Boxes/Basic/boxes.cpp b/src/Typeset/Boxes/Basic/boxes.cpp index 906b5365ed208d69f58fa03e794406432c411179..330f9379ae363d18b1760245d0dc8e26094bf052 100644 --- a/src/Typeset/Boxes/Basic/boxes.cpp +++ b/src/Typeset/Boxes/Basic/boxes.cpp @@ -168,6 +168,16 @@ box_rep::right_auto_spacing (SI size) { return this; }; +box +box_rep::left_contract_kerning (double factor) { + return this; +}; + +box +box_rep::right_contract_kerning (double factor) { + return this; +}; + box box_rep::expand_glyphs (int mode, double factor) { (void) mode; diff --git a/src/Typeset/Boxes/Basic/text_boxes.cpp b/src/Typeset/Boxes/Basic/text_boxes.cpp index 1860ec1ef26f69a588704d40aac27f970aa631d4..11603284dc2467d670e16a577e02cdd16a1e9b61 100644 --- a/src/Typeset/Boxes/Basic/text_boxes.cpp +++ b/src/Typeset/Boxes/Basic/text_boxes.cpp @@ -14,6 +14,7 @@ #include "boxes.hpp" #include "cork.hpp" #include "font.hpp" +#include "tm_debug.hpp" /****************************************************************************** * Text boxes @@ -89,6 +90,8 @@ struct text_box_rep : public box_rep { SI get_leaf_offset (string search); box left_auto_spacing (SI size); box right_auto_spacing (SI size); + box left_contract_kerning (double factor); + box right_contract_kerning (double factor); }; /****************************************************************************** @@ -139,6 +142,33 @@ text_box_rep::adjust_kerning (int mode, double factor) { return tm_new (ip, pos, str, fn, pen, nxk); } +box +text_box_rep::right_contract_kerning (double factor) { + SI pad= (SI) tm_round (factor * fn->wfn); + xkerning nxk (0, 0, 0); + if (!is_nil_or_zero (xk)) { + nxk->left = xk->left; + nxk->right = xk->right; + nxk->padding= xk->padding; + } + nxk->right-= pad; + box result= tm_new (ip, pos, str, fn, pen, nxk); + return result; +} + +box +text_box_rep::left_contract_kerning (double factor) { + SI pad= (SI) tm_round (factor * fn->wfn); + xkerning nxk (0, 0, 0); + if (!is_nil_or_zero (xk)) { + nxk->left = xk->left; + nxk->right = xk->right; + nxk->padding= xk->padding; + } + nxk->left-= pad; + return tm_new (ip, pos, str, fn, pen, nxk); +} + box text_box_rep::right_auto_spacing (SI size) { xkerning nxk (0, 0, 0); diff --git a/src/Typeset/Line/lazy_paragraph.cpp b/src/Typeset/Line/lazy_paragraph.cpp index 6577ee881f13962bc0c5327d670a3e87d0b84582..a46ebefc19d4d31ef953b014305ad14709fdb9e4 100644 --- a/src/Typeset/Line/lazy_paragraph.cpp +++ b/src/Typeset/Line/lazy_paragraph.cpp @@ -17,6 +17,7 @@ #include "analyze.hpp" #include "array.hpp" #include "converter.hpp" +#include "tm_debug.hpp" #include @@ -127,6 +128,33 @@ lazy_paragraph_rep::lazy_paragraph_rep (edit_env env2, path ip) init_decs= env->read (ATOM_DECORATIONS); + array left_annotation_puncts= array (); + left_annotation_puncts << string ("『") << string ("「") << string ("(") + << string ("【") << string ("《") << string ("〈"); + for (string punct : left_annotation_puncts) { + // 左侧有调整空间的符号和左侧夹注符号,是同一个数据集 + cjk_left_annotation_puncts->insert (utf8_to_cork (punct)); + } + cjk_left_annotation_puncts->insert ("<#2018>"); + cjk_left_annotation_puncts->insert ("<#201C>"); + + array right_annotation_puncts= array (); + right_annotation_puncts << string ("』") << string ("」") << string (")") + << string ("】") << string ("》") << string ("〉"); + for (string punct : right_annotation_puncts) { + cjk_right_annotation_puncts->insert (utf8_to_cork (punct)); + } + cjk_right_annotation_puncts->insert ("<#2019>"); + cjk_right_annotation_puncts->insert ("<#201D>"); + + array right_adjustable_puncts= array (); + right_adjustable_puncts << string ("。") << string (",") << string (":") + << string (";") << string ("!") << string ("?") + << string ("、"); + for (string punct : right_adjustable_puncts) { + cjk_right_adjustable_puncts->insert (utf8_to_cork (punct)); + } + array puncts= array (); puncts << string ("。") << string (",") << string (":") << string (";") << string ("!") << string ("?") << string ("、") << string ("~") @@ -134,11 +162,13 @@ lazy_paragraph_rep::lazy_paragraph_rep (edit_env env2, path ip) << string ("》") << string ("〉"); puncts << string ("『") << string ("「") << string ("(") << string ("【") << string ("《") << string ("〈"); - puncts << string ("“") << string ("”"); + for (string punct : puncts) { cjk_puncts->insert (utf8_to_cork (punct)); } - cjk_puncts->insert ("`"); + cjk_puncts->insert ("<#201C>"); + cjk_puncts->insert ("<#201D>"); + cjk_puncts->insert ("<#2018>"); cjk_puncts->insert ("<#2019>"); } @@ -174,6 +204,34 @@ is_cjk_language (language lan) { lan->lan_name == "japanese" || lan->lan_name == "taiwanese"; } +bool +lazy_paragraph_rep::is_cjk_puncts (line_item item) { + if (!is_text (item)) return false; + string text= item->b->get_leaf_string (); + return cjk_puncts->contains (text); +} + +bool +lazy_paragraph_rep::is_cjk_left_annotation_puncts (line_item item) { + if (!is_text (item)) return false; + string text= item->b->get_leaf_string (); + return cjk_left_annotation_puncts->contains (text); +} + +bool +lazy_paragraph_rep::is_cjk_right_annotation_puncts (line_item item) { + if (!is_text (item)) return false; + string text= item->b->get_leaf_string (); + return cjk_right_annotation_puncts->contains (text); +} + +bool +lazy_paragraph_rep::is_cjk_right_adjustable_puncts (line_item item) { + if (!is_text (item)) return false; + string text= item->b->get_leaf_string (); + return cjk_right_adjustable_puncts->contains (text); +} + void lazy_paragraph_rep::line_print (line_item item) { // cout << "Printing: " << item << "\n"; @@ -219,6 +277,10 @@ lazy_paragraph_rep::line_print (line_item item) { // cout << "line item: " << item << LF; items_box << is_not_skip (item); items_cjk_text << is_cjk_text (item); + items_cjk_puncts << is_cjk_puncts (item); + items_cjk_left_annotation_puncts << is_cjk_left_annotation_puncts (item); + items_cjk_right_annotation_puncts << is_cjk_right_annotation_puncts (item); + items_cjk_right_adjustable_puncts << is_cjk_right_adjustable_puncts (item); } item->b->x0= cur_w->def; item->b->y0= 0; @@ -287,6 +349,78 @@ lazy_paragraph_rep::protrude (bool lf, bool rf) { } } +// 辅助函数:执行kerning收缩并更新宽度和items数组 +void +lazy_paragraph_rep::contract_kerning (int index, double factor, bool is_left) { + box b = items[index]; + SI old_width = b->w (); + box nb = is_left ? b->left_contract_kerning (factor) + : b->right_contract_kerning (factor); + SI new_width = nb->w (); + SI width_diff= new_width - old_width; + cur_w+= width_diff; + items[index]= nb; +} + +void +lazy_paragraph_rep::adjust_consecutive_puncts () { + int items_N= N (items); + ASSERT (N (items) == N (items_cjk_puncts) && + N (items) == N (items_cjk_left_annotation_puncts) && + N (items) == N (items_cjk_right_annotation_puncts) && + N (items) == N (items_cjk_right_adjustable_puncts) && + N (items) == N (items_left) && N (items) == N (items_right) && + N (items) == N (items_cjk_text), + "length of items must match") + + bool no_cjk_puncts_flag= true; + for (int i= cur_start; i < items_N; i++) { + if (items_cjk_puncts[i]) { + no_cjk_puncts_flag= false; + break; + } + } + if (no_cjk_puncts_flag) return; + + int first, last; + find_first_last_text (first, last); + + for (int i= cur_start; i < items_N; i++) { + if (items_cjk_puncts[i]) { + // 先处理右侧有调整空间(非右夹注)的符号,或者右夹注符号 + if (i != last && + (items_cjk_right_adjustable_puncts[i] || + items_cjk_right_annotation_puncts[i]) && + items_right[i] != -1 && items_cjk_puncts[items_right[i]]) { + // 处理右侧有调整空间的符号 + if (items_cjk_right_adjustable_puncts[i] && + (items_cjk_right_annotation_puncts[items_right[i]] || + items_cjk_left_annotation_puncts[items_right[i]])) { + double factor= + items_cjk_right_annotation_puncts[items_right[i]] ? 0.5 : 0.25; + contract_kerning (i, factor, false); + } + // 处理右夹注符号 + if (items_cjk_right_annotation_puncts[i] && + items_cjk_puncts[items_right[i]]) { + double factor= + items_cjk_left_annotation_puncts[items_right[i]] ? 0.25 : 0.5; + contract_kerning (i, factor, false); + } + } + // 处理左夹注符号 + else if (i != first && items_cjk_left_annotation_puncts[i] && + items_left[i] != -1 && items_cjk_puncts[items_left[i]]) { + double factor= (items_cjk_right_annotation_puncts[items_left[i]] || + items_cjk_right_adjustable_puncts[items_left[i]]) + ? 0.25 + : 0.5; + contract_kerning (i, factor, true); + } + } + } +} + void lazy_paragraph_rep::cjk_auto_spacing () { int prev = -1; @@ -570,6 +704,7 @@ lazy_paragraph_rep::make_unit (string mode, SI the_width, bool break_flag) { if (is_cjk_language (env->lan)) { cjk_auto_spacing (); + adjust_consecutive_puncts (); } // stretching case @@ -734,15 +869,19 @@ lazy_paragraph_rep::handle_decorations () { void lazy_paragraph_rep::line_start () { - items = array (); - items_sp = array (); - items_box = array (); - items_cjk_text= array (); - items_left = array (); - items_right = array (); - spcs = array (); - fl = array (); - notes = array (); + items = array (); + items_sp = array (); + items_box = array (); + items_cjk_text = array (); + items_cjk_puncts = array (); + items_cjk_left_annotation_puncts = array (); + items_cjk_right_annotation_puncts= array (); + items_cjk_right_adjustable_puncts= array (); + items_left = array (); + items_right = array (); + spcs = array (); + fl = array (); + notes = array (); cur_r = 0; cur_start= 0; diff --git a/src/Typeset/Line/lazy_paragraph.hpp b/src/Typeset/Line/lazy_paragraph.hpp index b364a568be342aff1f9cead6cd2a9a025baa3292..707d369bdf408d2f1885108305e5d9fefe3a1959 100644 --- a/src/Typeset/Line/lazy_paragraph.hpp +++ b/src/Typeset/Line/lazy_paragraph.hpp @@ -34,13 +34,29 @@ protected: array items_left; // the index of the previous non-empty text box item array items_right; // the index of the next non-empty text box item array items_box; // is non-empty text box item: true / false - array items_cjk_text; // is non-empty text box item: true / false - hashset cjk_puncts; - array spcs; // the spacing between the boxes of items - array fl; // floating insertions - array notes; // line notes - array tabs; // tabulations - array decs; // decorations of the boxes on lines + array items_cjk_text; // is non-empty text box item: true / false + array items_cjk_puncts; // is non-empty text box item: true / false + array items_cjk_left_annotation_puncts; // CJK left annotation + // punctuation marks array + array items_cjk_right_annotation_puncts; // CJK right annotation + // punctuation marks array + array items_cjk_right_adjustable_puncts; // CJK right adjustable + // punctuation marks array + + hashset cjk_puncts; // CJK punctuation set + hashset + cjk_left_annotation_puncts; // CJK left annotation punctuation set + hashset + cjk_left_adjustable_puncts; // CJK left adjustable punctuation set + hashset + cjk_right_annotation_puncts; // CJK right annotation punctuation set + hashset + cjk_right_adjustable_puncts; // CJK right adjustable punctuation set + array spcs; // the spacing between the boxes of items + array fl; // floating insertions + array notes; // line notes + array tabs; // tabulations + array decs; // decorations of the boxes on lines SI cur_r; // the current right offset of the last line unit space cur_w; // the current width of the line unit int cur_start; // index of the start of the line unit @@ -72,6 +88,10 @@ protected: array swell; // swell properties for lines with large height tree init_decs; // initial decorations + bool is_cjk_puncts (line_item item); + bool is_cjk_left_annotation_puncts (line_item item); + bool is_cjk_right_annotation_puncts (line_item item); + bool is_cjk_right_adjustable_puncts (line_item item); void line_print (line_item item); void line_print (line_item item, path start, path end); void line_print (path start, path end); @@ -79,6 +99,8 @@ protected: void find_first_last_text (int& first, int& last); void protrude (bool lf, bool rf); void cjk_auto_spacing (); + void contract_kerning (int index, double factor, bool is_left); + void adjust_consecutive_puncts (); array adjusted (double factor, int first, int last); void increase_kerning (SI dw, SI the_width); void decrease_kerning (SI dw, SI the_width); diff --git a/src/Typeset/boxes.hpp b/src/Typeset/boxes.hpp index 9650c3810a4e3482cba2e20c741ac865e4aa95de..7a8b00f5487a4dba7edc17426d0fa37b29481691 100644 --- a/src/Typeset/boxes.hpp +++ b/src/Typeset/boxes.hpp @@ -185,6 +185,8 @@ public: virtual box adjust_kerning (int mode, double factor); virtual box left_auto_spacing (SI size); virtual box right_auto_spacing (SI size); + virtual box left_contract_kerning (double factor); + virtual box right_contract_kerning (double factor); virtual box expand_glyphs (int mode, double factor); virtual void get_cell_extents (SI& l, SI& r); virtual box adjust_cell_geometry (SI dx, SI dl, SI dr);