To: vim_dev@googlegroups.com Subject: Patch 8.2.1933 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 8.2.1933 Problem: Cannot sort using locale ordering. Solution: Add a flag for :sort and sort() to use the locale. (Dominique Pellé, closes #7237) Files: runtime/doc/change.txt, runtime/doc/eval.txt, src/ex_cmds.c, src/list.c, src/testdir/test_sort.vim *** ../vim-8.2.1932/runtime/doc/change.txt 2020-06-04 18:21:56.046395485 +0200 --- runtime/doc/change.txt 2020-11-01 13:53:59.996703330 +0100 *************** *** 1799,1805 **** found here: |sort()|, |uniq()|. *:sor* *:sort* ! :[range]sor[t][!] [b][f][i][n][o][r][u][x] [/{pattern}/] Sort lines in [range]. When no range is given all lines are sorted. --- 1801,1807 ---- found here: |sort()|, |uniq()|. *:sor* *:sort* ! :[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/] Sort lines in [range]. When no range is given all lines are sorted. *************** *** 1807,1812 **** --- 1809,1822 ---- With [i] case is ignored. + With [l] sort uses the current locale. See + `language collate` to check or set the locale used + for ordering. For example, with "en_US.UTF8", + Ö will be ordered after O and before P, + whereas with the Swedish locale "sv_SE.UTF8", + it will be after Z. + Case is typically ignored by the locale. + Options [n][f][x][o][b] are mutually exclusive. With [n] sorting is done on the first decimal number *************** *** 1873,1880 **** Note that using `:sort` with `:global` doesn't sort the matching lines, it's quite useless. ! The details about sorting depend on the library function used. There is no ! guarantee that sorting obeys the current locale. You will have to try it out. Vim does do a "stable" sort. The sorting can be interrupted, but if you interrupt it too late in the --- 1883,1889 ---- Note that using `:sort` with `:global` doesn't sort the matching lines, it's quite useless. ! `:sort` does not use the current locale unless the l flag is used. Vim does do a "stable" sort. The sorting can be interrupted, but if you interrupt it too late in the *** ../vim-8.2.1932/runtime/doc/eval.txt 2020-10-23 16:49:30.112311448 +0200 --- runtime/doc/eval.txt 2020-11-01 13:54:00.000703318 +0100 *************** *** 9632,9637 **** --- 9700,9712 ---- When {func} is given and it is '1' or 'i' then case is ignored. + When {func} is given and it is 'l' then the current locale + is used for ordering. See `language collate` to check or set + the locale used for ordering. For example, with "en_US.UTF8", + Ö will be ordered after O and before P, whereas with the + Swedish locale "sv_SE.UTF8", it will be after Z. + Case is typically ignored by the locale. + When {func} is given and it is 'n' then all items will be sorted numerical (Implementation detail: This uses the strtod() function to parse numbers, Strings, Lists, Dicts and *** ../vim-8.2.1932/src/ex_cmds.c 2020-10-25 17:09:46.217011625 +0100 --- src/ex_cmds.c 2020-11-01 13:54:00.000703318 +0100 *************** *** 277,282 **** --- 277,283 ---- static char_u *sortbuf1; static char_u *sortbuf2; + static int sort_lc; // sort using locale static int sort_ic; // ignore case static int sort_nr; // sort on number static int sort_rx; // sort on regex instead of skipping it *************** *** 307,313 **** } st_u; } sorti_T; ! static int sort_compare(const void *s1, const void *s2); static int sort_compare(const void *s1, const void *s2) --- 308,320 ---- } st_u; } sorti_T; ! static int ! string_compare(const void *s1, const void *s2) ! { ! if (sort_lc) ! return strcoll((char *)s1, (char *)s2); ! return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2); ! } static int sort_compare(const void *s1, const void *s2) *************** *** 350,357 **** l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1); sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = 0; ! result = sort_ic ? STRICMP(sortbuf1, sortbuf2) ! : STRCMP(sortbuf1, sortbuf2); } // If two lines have the same value, preserve the original line order. --- 357,363 ---- l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1); sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = 0; ! result = string_compare(sortbuf1, sortbuf2); } // If two lines have the same value, preserve the original line order. *************** *** 398,404 **** if (nrs == NULL) goto sortend; ! sort_abort = sort_ic = sort_rx = sort_nr = 0; #ifdef FEAT_FLOAT sort_flt = 0; #endif --- 404,410 ---- if (nrs == NULL) goto sortend; ! sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = 0; #ifdef FEAT_FLOAT sort_flt = 0; #endif *************** *** 409,414 **** --- 415,422 ---- ; else if (*p == 'i') sort_ic = TRUE; + else if (*p == 'l') + sort_lc = TRUE; else if (*p == 'r') sort_rx = TRUE; else if (*p == 'n') *************** *** 614,621 **** change_occurred = TRUE; s = ml_get(get_lnum); ! if (!unique || i == 0 ! || (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0) { // Copy the line into a buffer, it may become invalid in // ml_append(). And it's needed for "unique". --- 622,628 ---- change_occurred = TRUE; s = ml_get(get_lnum); ! if (!unique || i == 0 || string_compare(s, sortbuf1) != 0) { // Copy the line into a buffer, it may become invalid in // ml_append(). And it's needed for "unique". *** ../vim-8.2.1932/src/list.c 2020-10-15 22:29:13.566726912 +0200 --- src/list.c 2020-11-01 13:54:00.000703318 +0100 *************** *** 1516,1521 **** --- 1516,1522 ---- typedef struct { int item_compare_ic; + int item_compare_lc; int item_compare_numeric; int item_compare_numbers; #ifdef FEAT_FLOAT *************** *** 1594,1603 **** p2 = (char_u *)""; if (!sortinfo->item_compare_numeric) { ! if (sortinfo->item_compare_ic) ! res = STRICMP(p1, p2); else ! res = STRCMP(p1, p2); } else { --- 1595,1604 ---- p2 = (char_u *)""; if (!sortinfo->item_compare_numeric) { ! if (sortinfo->item_compare_lc) ! res = strcoll((char *)p1, (char *)p2); else ! res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2); } else { *************** *** 1706,1711 **** --- 1707,1713 ---- goto theend; // short list sorts pretty quickly info.item_compare_ic = FALSE; + info.item_compare_lc = FALSE; info.item_compare_numeric = FALSE; info.item_compare_numbers = FALSE; #ifdef FEAT_FLOAT *************** *** 1773,1778 **** --- 1775,1785 ---- info.item_compare_func = NULL; info.item_compare_ic = TRUE; } + else if (STRCMP(info.item_compare_func, "l") == 0) + { + info.item_compare_func = NULL; + info.item_compare_lc = TRUE; + } } } *** ../vim-8.2.1932/src/testdir/test_sort.vim 2020-09-23 22:38:01.507927503 +0200 --- src/testdir/test_sort.vim 2020-11-01 13:54:00.000703318 +0100 *************** *** 15,20 **** --- 15,39 ---- " numbers compared as strings call assert_equal([1, 2, 3], sort([3, 2, 1])) call assert_equal([13, 28, 3], sort([3, 28, 13])) + + call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'], + \ sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'])) + + call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'], + \ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i')) + + let lc = execute('language collate') + " With the following locales, the accentuated letters are ordered + " similarly to the non-accentuated letters... + if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"' + call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'], + \ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l')) + " ... whereas with a Swedish locale, the accentuated letters are ordered + " after Z. + elseif lc =~? '"sv.*utf-\?8"' + call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'], + \ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l')) + endif endfunc func Test_sort_numeric() *************** *** 1204,1209 **** --- 1223,1279 ---- \ }, \ ] + " With the following locales, the accentuated letters are ordered + " similarly to the non-accentuated letters... + let lc = execute('language collate') + if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"' + let tests += [ + \ { + \ 'name' : 'sort with locale', + \ 'cmd' : '%sort l', + \ 'input' : [ + \ 'A', + \ 'E', + \ 'O', + \ 'À', + \ 'È', + \ 'É', + \ 'Ô', + \ 'Œ', + \ 'Z', + \ 'a', + \ 'e', + \ 'o', + \ 'à', + \ 'è', + \ 'é', + \ 'ô', + \ 'œ', + \ 'z' + \ ], + \ 'expected' : [ + \ 'a', + \ 'A', + \ 'à', + \ 'À', + \ 'e', + \ 'E', + \ 'é', + \ 'É', + \ 'è', + \ 'È', + \ 'o', + \ 'O', + \ 'ô', + \ 'Ô', + \ 'œ', + \ 'Œ', + \ 'z', + \ 'Z' + \ ] + \ }, + \ ] + endif if has('float') let tests += [ \ { *** ../vim-8.2.1932/src/version.c 2020-11-01 13:33:44.496700978 +0100 --- src/version.c 2020-11-01 13:55:51.496337061 +0100 *************** *** 752,753 **** --- 752,755 ---- { /* Add new patch number below this line */ + /**/ + 1933, /**/ -- hundred-and-one symptoms of being an internet addict: 171. You invent another person and chat with yourself in empty chat rooms. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///