From 0b5c968595d780a6dd681f37443bd413d29ab839 Mon Sep 17 00:00:00 2001 From: zhangdd_ewan Date: Fri, 5 Sep 2025 17:29:39 +0800 Subject: [PATCH 1/3] =?UTF-8?q?genrb=E7=BC=96=E8=AF=91=E6=9C=9F=E6=A0=88?= =?UTF-8?q?=E6=BA=A2=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zhangdd_ewan --- icu4c/source/tools/genrb/parse.cpp | 50 +++++++++++++++++------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/icu4c/source/tools/genrb/parse.cpp b/icu4c/source/tools/genrb/parse.cpp index 2c5d4952..0ba0a16c 100644 --- a/icu4c/source/tools/genrb/parse.cpp +++ b/icu4c/source/tools/genrb/parse.cpp @@ -1153,7 +1153,7 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp struct UString *tokenValue; struct UString comment; enum ETokenType token; - char subtag[1024]; + CharString subtag; UnicodeString rules; UBool haveRules = false; UVersionInfo version; @@ -1189,15 +1189,15 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp return NULL; } - u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); - + subtag.clear(); + subtag.appendInvariantChars(tokenValue->fChars, u_strlen(tokenValue->fChars), *status); if (U_FAILURE(*status)) { res_close(result); return NULL; } - member = parseResource(state, subtag, NULL, status); + member = parseResource(state, subtag.data(), nullptr, status); if (U_FAILURE(*status)) { @@ -1208,7 +1208,7 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp { // Ignore the parsed resources, continue parsing. } - else if (uprv_strcmp(subtag, "Version") == 0 && member->isString()) + else if (uprv_strcmp(subtag.data(), "Version") == 0 && member->isString()) { StringResource *sr = static_cast(member); char ver[40]; @@ -1225,11 +1225,11 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp result->add(member, line, *status); member = NULL; } - else if(uprv_strcmp(subtag, "%%CollationBin")==0) + else if(uprv_strcmp(subtag.data(), "%%CollationBin")==0) { /* discard duplicate %%CollationBin if any*/ } - else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString()) + else if (uprv_strcmp(subtag.data(), "Sequence") == 0 && member->isString()) { StringResource *sr = static_cast(member); rules = sr->fString; @@ -1395,7 +1395,7 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n struct UString *tokenValue; struct UString comment; enum ETokenType token; - char subtag[1024], typeKeyword[1024]; + CharString subtag, typeKeyword; uint32_t line; result = table_open(state->bundle, tag, NULL, status); @@ -1437,17 +1437,17 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n return NULL; } - u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); - + subtag.clear(); + subtag.appendInvariantChars(tokenValue->fChars, u_strlen(tokenValue->fChars), *status); if (U_FAILURE(*status)) { res_close(result); return NULL; } - if (uprv_strcmp(subtag, "default") == 0) + if (uprv_strcmp(subtag.data(), "default") == 0) { - member = parseResource(state, subtag, NULL, status); + member = parseResource(state, subtag.data(), nullptr, status); if (U_FAILURE(*status)) { @@ -1466,22 +1466,29 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n if(token == TOK_OPEN_BRACE) { token = getToken(state, &tokenValue, &comment, &line, status); TableResource *collationRes; - if (keepCollationType(subtag)) { - collationRes = table_open(state->bundle, subtag, NULL, status); + if (keepCollationType(subtag.data())) { + collationRes = table_open(state->bundle, subtag.data(), nullptr, status); } else { collationRes = NULL; } // need to parse the collation data regardless - collationRes = addCollation(state, collationRes, subtag, startline, status); + collationRes = addCollation(state, collationRes, subtag.data(), startline, status); if (collationRes != NULL) { result->add(collationRes, startline, *status); } } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ /* we could have a table too */ token = peekToken(state, 1, &tokenValue, &line, &comment, status); - u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); - if(uprv_strcmp(typeKeyword, "alias") == 0) { - member = parseResource(state, subtag, NULL, status); + typeKeyword.clear(); + typeKeyword.appendInvariantChars(tokenValue->fChars, u_strlen(tokenValue->fChars), *status); + if (U_FAILURE(*status)) + { + res_close(result); + return nullptr; + } + + if(uprv_strcmp(typeKeyword.data(), "alias") == 0) { + member = parseResource(state, subtag.data(), nullptr, status); if (U_FAILURE(*status)) { res_close(result); @@ -1523,7 +1530,7 @@ realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t star struct UString *tokenValue=NULL; struct UString comment; enum ETokenType token; - char subtag[1024]; + CharString subtag; uint32_t line; UBool readToken = false; @@ -1562,7 +1569,8 @@ realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t star } if(uprv_isInvariantUString(tokenValue->fChars, -1)) { - u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); + subtag.clear(); + subtag.appendInvariantChars(tokenValue->fChars, u_strlen(tokenValue->fChars), *status); } else { *status = U_INVALID_FORMAT_ERROR; error(line, "invariant characters required for table keys"); @@ -1575,7 +1583,7 @@ realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t star return NULL; } - member = parseResource(state, subtag, &comment, status); + member = parseResource(state, subtag.data(), &comment, status); if (member == NULL || U_FAILURE(*status)) { -- Gitee From e89c85878670e7f4c9f4f69eb01625b5c30bc064 Mon Sep 17 00:00:00 2001 From: zhangdd_ewan Date: Mon, 8 Sep 2025 15:47:41 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E6=BC=8F=E6=B4=9E=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zhangdd_ewan --- icu4c/source/tools/genrb/parse.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/icu4c/source/tools/genrb/parse.cpp b/icu4c/source/tools/genrb/parse.cpp index 0ba0a16c..3d40278a 100644 --- a/icu4c/source/tools/genrb/parse.cpp +++ b/icu4c/source/tools/genrb/parse.cpp @@ -1439,6 +1439,7 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n subtag.clear(); subtag.appendInvariantChars(tokenValue->fChars, u_strlen(tokenValue->fChars), *status); + if (U_FAILURE(*status)) { res_close(result); -- Gitee From 4a4f947373a8ed566021b920f14dbc1c4aa370d2 Mon Sep 17 00:00:00 2001 From: zhangdd_ewan Date: Fri, 12 Sep 2025 16:34:14 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E6=BC=8F=E6=B4=9E=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zhangdd_ewan --- icu4c/source/tools/genrb/parse.cpp | 38 +++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/icu4c/source/tools/genrb/parse.cpp b/icu4c/source/tools/genrb/parse.cpp index 3d40278a..9f5a184b 100644 --- a/icu4c/source/tools/genrb/parse.cpp +++ b/icu4c/source/tools/genrb/parse.cpp @@ -1153,7 +1153,9 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp struct UString *tokenValue; struct UString comment; enum ETokenType token; + /* 20250912 begin */ CharString subtag; + /* 20250912 end */ UnicodeString rules; UBool haveRules = false; UVersionInfo version; @@ -1188,17 +1190,18 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp return NULL; } - + /* 20250912 begin */ subtag.clear(); subtag.appendInvariantChars(tokenValue->fChars, u_strlen(tokenValue->fChars), *status); + /* 20250912 end */ if (U_FAILURE(*status)) { res_close(result); return NULL; } - + /* 20250912 begin */ member = parseResource(state, subtag.data(), nullptr, status); - + /* 20250912 end */ if (U_FAILURE(*status)) { res_close(result); @@ -1208,7 +1211,9 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp { // Ignore the parsed resources, continue parsing. } + /* 20250912 begin */ else if (uprv_strcmp(subtag.data(), "Version") == 0 && member->isString()) + /* 20250912 end */ { StringResource *sr = static_cast(member); char ver[40]; @@ -1225,11 +1230,15 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp result->add(member, line, *status); member = NULL; } + /* 20250912 begin */ else if(uprv_strcmp(subtag.data(), "%%CollationBin")==0) + /* 20250912 end */ { /* discard duplicate %%CollationBin if any*/ } + /* 20250912 begin */ else if (uprv_strcmp(subtag.data(), "Sequence") == 0 && member->isString()) + /* 20250912 end */ { StringResource *sr = static_cast(member); rules = sr->fString; @@ -1395,7 +1404,9 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n struct UString *tokenValue; struct UString comment; enum ETokenType token; + /* 20250912 begin */ CharString subtag, typeKeyword; + /* 20250912 end */ uint32_t line; result = table_open(state->bundle, tag, NULL, status); @@ -1436,19 +1447,20 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n return NULL; } - + /* 20250912 begin */ subtag.clear(); subtag.appendInvariantChars(tokenValue->fChars, u_strlen(tokenValue->fChars), *status); - + /* 20250912 end */ if (U_FAILURE(*status)) { res_close(result); return NULL; } - + /* 20250912 begin */ if (uprv_strcmp(subtag.data(), "default") == 0) { member = parseResource(state, subtag.data(), nullptr, status); + /* 20250912 end */ if (U_FAILURE(*status)) { @@ -1467,19 +1479,24 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n if(token == TOK_OPEN_BRACE) { token = getToken(state, &tokenValue, &comment, &line, status); TableResource *collationRes; + /* 20250912 begin */ if (keepCollationType(subtag.data())) { collationRes = table_open(state->bundle, subtag.data(), nullptr, status); + /* 20250912 end */ } else { collationRes = NULL; } // need to parse the collation data regardless + /* 20250912 begin */ collationRes = addCollation(state, collationRes, subtag.data(), startline, status); + /* 20250912 end */ if (collationRes != NULL) { result->add(collationRes, startline, *status); } } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ /* we could have a table too */ token = peekToken(state, 1, &tokenValue, &line, &comment, status); + /* 20250912 begin */ typeKeyword.clear(); typeKeyword.appendInvariantChars(tokenValue->fChars, u_strlen(tokenValue->fChars), *status); if (U_FAILURE(*status)) @@ -1490,6 +1507,7 @@ parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool n if(uprv_strcmp(typeKeyword.data(), "alias") == 0) { member = parseResource(state, subtag.data(), nullptr, status); + /* 20250912 end */ if (U_FAILURE(*status)) { res_close(result); @@ -1531,7 +1549,9 @@ realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t star struct UString *tokenValue=NULL; struct UString comment; enum ETokenType token; + /* 20250912 begin */ CharString subtag; + /* 20250912 end */ uint32_t line; UBool readToken = false; @@ -1570,8 +1590,10 @@ realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t star } if(uprv_isInvariantUString(tokenValue->fChars, -1)) { + /* 20250912 begin */ subtag.clear(); subtag.appendInvariantChars(tokenValue->fChars, u_strlen(tokenValue->fChars), *status); + /* 20250912 end */ } else { *status = U_INVALID_FORMAT_ERROR; error(line, "invariant characters required for table keys"); @@ -1583,9 +1605,9 @@ realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t star error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); return NULL; } - + /* 20250912 begin */ member = parseResource(state, subtag.data(), &comment, status); - + /* 20250912 end */ if (member == NULL || U_FAILURE(*status)) { error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); -- Gitee