Busybox支持中文的解决办法

转载：http://blog.csdn.net/wavemcu/article/details/7202908

***************************************************************************************************************************
作者：EasyWave 时间：2012.01.15

类别：linux驱动开发声明：转载，请保留链接

***************************************************************************************************************************

在嵌入式linux系统中，busybox是最常见的用来构建文件系统的。可是从busybox1.17.0以上之后，对ls命令不做修改是无法显示中文的。就算是内核设置了支持中文的话，在shell下用ls命令也是无法显示中文的，这是因为busybox1.17.0以后版本对中文的支持进行了限制。现在就来讲讲如何修改让busybox1.17.0以上版本支持中文，要想让busybox1.17.0以上支持中文，需要修改两个文件：printable_string.c以及unicode.c 。下面来分析，为什么ls命令无法显示中文。请看printable_string.c未修改过的代码：

const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
{
    static char *saved[4];
    static unsigned cur_saved; /* = 0 */

    char *dst;
    const char *s;

    s = str;
    while (1) {
        unsigned char c = *s;
        if (c == '\0') {
            /* 99+% of inputs do not need conversion */
            if (stats) {
                stats->byte_count = (s - str);
                stats->unicode_count = (s - str);
                stats->unicode_width = (s - str);
            }
            return str;
        }
        if (c < ' ')
            break;
        if (c >= 0x7f)
            break;
        s++;
    }

#if ENABLE_UNICODE_SUPPORT
    dst = unicode_conv_to_printable(stats, str);
#else
    {
        char *d = dst = xstrdup(str);
        while (1) {
            unsigned char c = *d;
            if (c == '\0')
                break;
                if (c < ' ' || c >= 0x7f)
                   *d = '?';
            d++;
        }
        if (stats) {
            stats->byte_count = (d - dst);
            stats->unicode_count = (d - dst);
            stats->unicode_width = (d - dst);
        }
    }
#endif

    free(saved[cur_saved]);
    saved[cur_saved] = dst;
    cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);

    return dst;
}

从上面代码23和24行以及37和38行可以看出：大于0x7F的字符直接被break掉，或者直接被“？”代替了。所以就算是linux内核设置了支持中文，也是无法显示出来的，被“？”代替了。修改红色加粗的代码如下：

const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
{
    static char *saved[4];
    static unsigned cur_saved; /* = 0 */

    char *dst;
    const char *s;

    s = str;
    while (1) {
        unsigned char c = *s;
        if (c == '\0') {
            /* 99+% of inputs do not need conversion */
            if (stats) {
                stats->byte_count = (s - str);
                stats->unicode_count = (s - str);
                stats->unicode_width = (s - str);
            }
            return str;
        }
        if (c < ' ')
            break;
    /*
        if (c >= 0x7f)
            break;
    */
        s++;
    }

#if ENABLE_UNICODE_SUPPORT
    dst = unicode_conv_to_printable(stats, str);
#else
    {
        char *d = dst = xstrdup(str);
        while (1) {
            unsigned char c = *d;
            if (c == '\0')
                break;
            if (c < ' ' /*|| c >= 0x7f */)
                *d = '?';
            d++;
        }
        if (stats) {
            stats->byte_count = (d - dst);
            stats->unicode_count = (d - dst);
            stats->unicode_width = (d - dst);
        }
    }
#endif

    free(saved[cur_saved]);
    saved[cur_saved] = dst;
    cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);

    return dst;
}

经过以上的修改之后，同时busybox1.17.0配置的时候没有选中[] Support Unicode的话，那么采用ls命令是可以看到中文的，这个我自己已经亲自测试过的。可是还有一种情况：busybox1.17.0在配置的时候选中了：[*] Support Unicode，见下：

在配置里，有Support Unicode选上的： 
Busybox Settings->General Configuration->
   │ │[ ] Enable locale support (system needs locale for this to work)     │ │  
   │ │[*] Support Unicode                                                  │ │  
   │ │[*] Support for --long-options                                       │ │

那么这样还需要修改一个文件，这个文件就是：unicode.c。如果不修改这个文件，ls命令也是无法显示出中文的。见下未修改的代码：

static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
{
    char *dst;
    unsigned dst_len;
    unsigned uni_count;
    unsigned uni_width;

    if (unicode_status != UNICODE_ON) {
        char *d;
        if (flags & UNI_FLAG_PAD) {
            d = dst = xmalloc(width + 1);
            while ((int)--width >= 0) {
                unsigned char c = *src;
                if (c == '\0') {
                    do
                        *d++ = ' ';
                    while ((int)--width >= 0);
                    break;
                }
                *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
                src++;
            }
            *d = '\0';
        } else {
            d = dst = xstrndup(src, width);
            while (*d) {
                unsigned char c = *d;
                if (c < ' ' || c >= 0x7f)
                    *d = '?';
                d++;
            }
        }
        if (stats) {
            stats->byte_count = (d - dst);
            stats->unicode_count = (d - dst);
            stats->unicode_width = (d - dst);
        }
        return dst;
    }

    dst = NULL;
    uni_count = uni_width = 0;
    dst_len = 0;
    while (1) {
        int w;
        wchar_t wc;

#if ENABLE_UNICODE_USING_LOCALE
        {
            mbstate_t mbst = { 0 };
            ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
            /* If invalid sequence is seen: -1 is returned,
             * src points to the invalid sequence, errno = EILSEQ.
             * Else number of wchars (excluding terminating L'\0')
             * written to dest is returned.
             * If len (here: 1) non-L'\0' wchars stored at dest,
             * src points to the next char to be converted.
             * If string is completely converted: src = NULL.
             */
            if (rc == 0) /* end-of-string */
                break;
            if (rc < 0) { /* error */
                src++;
                goto subst;
            }
            if (!iswprint(wc))
                goto subst;
        }
#else
        src = mbstowc_internal(&wc, src);
        /* src is advanced to next mb char
         * wc == ERROR_WCHAR: invalid sequence is seen
         * else: wc is set
         */
        if (wc == ERROR_WCHAR) /* error */
            goto subst;
        if (wc == 0) /* end-of-string */
            break;
#endif
        if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
            goto subst;
        w = wcwidth(wc);
        if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
         || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
         || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
        ) {
 subst:
            wc = CONFIG_SUBST_WCHAR;
            w = 1;
        }
        width -= w;
        /* Note: if width == 0, we still may add more chars,
         * they may be zero-width or combining ones */
        if ((int)width < 0) {
            /* can't add this wc, string would become longer than width */
            width += w;
            break;
        }

        uni_count++;
        uni_width += w;
        dst = xrealloc(dst, dst_len + MB_CUR_MAX);
#if ENABLE_UNICODE_USING_LOCALE
        {
            mbstate_t mbst = { 0 };
            dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
        }
#else
        dst_len += wcrtomb_internal(&dst[dst_len], wc);
#endif
    }

    /* Pad to remaining width */
    if (flags & UNI_FLAG_PAD) {
        dst = xrealloc(dst, dst_len + width + 1);
        uni_count += width;
        uni_width += width;
        while ((int)--width >= 0) {
            dst[dst_len++] = ' ';
        }
    }
    dst[dst_len] = '\0';
    if (stats) {
        stats->byte_count = dst_len;
        stats->unicode_count = uni_count;
        stats->unicode_width = uni_width;
    }

    return dst;
}

见上面20行和28行，需要修改一下，修改后的代码见下：

static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
{
    char *dst;
    unsigned dst_len;
    unsigned uni_count;
    unsigned uni_width;

    if (unicode_status != UNICODE_ON) {
        char *d;
        if (flags & UNI_FLAG_PAD) {
            d = dst = xmalloc(width + 1);
            while ((int)--width >= 0) {
                unsigned char c = *src;
                if (c == '\0') {
                    do
                        *d++ = ' ';
                    while ((int)--width >= 0);
                    break;
                }
                *d++ = (c >= ' '/* && c < 0x7f */) ? c : '?';
                src++;
            }
            *d = '\0';
        } else {
            d = dst = xstrndup(src, width);
            while (*d) {
                unsigned char c = *d;
                if (c < ' '/* || c >= 0x7f */)
                    *d = '?';
                d++;
            }
        }
        if (stats) {
            stats->byte_count = (d - dst);
            stats->unicode_count = (d - dst);
            stats->unicode_width = (d - dst);
        }
        return dst;
    }

    dst = NULL;
    uni_count = uni_width = 0;
    dst_len = 0;
    while (1) {
        int w;
        wchar_t wc;

#if ENABLE_UNICODE_USING_LOCALE
        {
            mbstate_t mbst = { 0 };
            ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
            /* If invalid sequence is seen: -1 is returned,
             * src points to the invalid sequence, errno = EILSEQ.
             * Else number of wchars (excluding terminating L'\0')
             * written to dest is returned.
             * If len (here: 1) non-L'\0' wchars stored at dest,
             * src points to the next char to be converted.
             * If string is completely converted: src = NULL.
             */
            if (rc == 0) /* end-of-string */
                break;
            if (rc < 0) { /* error */
                src++;
                goto subst;
            }
            if (!iswprint(wc))
                goto subst;
        }
#else
        src = mbstowc_internal(&wc, src);
        /* src is advanced to next mb char
         * wc == ERROR_WCHAR: invalid sequence is seen
         * else: wc is set
         */
        if (wc == ERROR_WCHAR) /* error */
            goto subst;
        if (wc == 0) /* end-of-string */
            break;
#endif
        if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
            goto subst;
        w = wcwidth(wc);
        if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
         || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
         || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
        ) {
 subst:
            wc = CONFIG_SUBST_WCHAR;
            w = 1;
        }
        width -= w;
        /* Note: if width == 0, we still may add more chars,
         * they may be zero-width or combining ones */
        if ((int)width < 0) {
            /* can't add this wc, string would become longer than width */
            width += w;
            break;
        }

        uni_count++;
        uni_width += w;
        dst = xrealloc(dst, dst_len + MB_CUR_MAX);
#if ENABLE_UNICODE_USING_LOCALE
        {
            mbstate_t mbst = { 0 };
            dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
        }
#else
        dst_len += wcrtomb_internal(&dst[dst_len], wc);
#endif
    }

    /* Pad to remaining width */
    if (flags & UNI_FLAG_PAD) {
        dst = xrealloc(dst, dst_len + width + 1);
        uni_count += width;
        uni_width += width;
        while ((int)--width >= 0) {
            dst[dst_len++] = ' ';
        }
    }
    dst[dst_len] = '\0';
    if (stats) {
        stats->byte_count = dst_len;
        stats->unicode_count = uni_count;
        stats->unicode_width = uni_width;
    }

    return dst;
}

经过以上修改之后，就算配置支持Unicode，ls命令也是可以支持中文的。同时也可以进入中文目录可以文件夹。

posted @ 2014-03-28 19:38 dolinux 阅读(1212) 评论(0) 收藏举报

刷新页面返回顶部

dolinux

Linux内核工程师，计算机底层技术爱好者

Busybox支持中文的解决办法

公告