c中实现utf8和gbk的互转

wusiqi111 2017-02-14

展开全文

c中实现utf8和gbk的互转

博客分类：

c&c++
linux

C代码  
#include <iconv.h>  
#include <stdlib.h>  
#include <stdio.h>  
#include <unistd.h>  
#include <fcntl.h>  
#include <string.h>  
#include <sys/stat.h>  
  
int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen,  
        char *outbuf, size_t outlen) {  
    iconv_t cd;  
    char **pin = &inbuf;  
    char **pout = &outbuf;  
  
    cd = iconv_open(to_charset, from_charset);  
    if (cd == 0)  
        return -1;  
    memset(outbuf, 0, outlen);  
    if (iconv(cd, pin, &inlen, pout, &outlen) == -1)  
        return -1;  
    iconv_close(cd);  
    *pout = '\0';  
  
    return 0;  
}  
  
int u2g(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {  
    return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen);  
}  
  
int g2u(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {  
    return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen);  
}  
  
int main(void) {  
    char *s = "中国";  
    int fd = open("test.txt", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);  
    char buf[10];  
    u2g(s, strlen(s), buf, sizeof(buf));  
    write(fd, buf, strlen(buf));  
    close(fd);  
  
    fd = open("test.txt2", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);  
    char buf2[10];  
    g2u(buf, strlen(buf), buf2, sizeof(buf2));  
    write(fd, buf2, strlen(buf2));  
    close(fd);  
    return 1;  
}  

上面是使用iconv函数。

方式二：使用如下两个函数

mbstowcs将多字节编码转换为宽字节编码

wcstombs将宽字节编码转换为多字节编码

注意，需要系统编码的支持，可以通过locale -a 查看系统支持的。若不支持zh_CN.gbk, 需要安装，例如，在ubuntu上的安装步骤如下：

编辑

$sudo vi /var/lib/locales/supported.d/zh-hans

更新成

zh_CN.UTF-8 UTF-8
zh_SG.UTF-8 UTF-8
zh_CN.GBK GBK
zh_CN.GB18030 GB18030

// 更新
$ sudo locale-gen

// 查看
$ locale -a
C
POSIX
zh_CN.gb18030
zh_CN.gbk
zh_CN.utf8
zh_SG.utf8

C代码  
#include <stdlib.h>  
#include <stdio.h>  
#include <string.h>  
#include <unistd.h>  
#include <fcntl.h>  
#include <sys/stat.h>  
#include <locale.h>  
  
/** 
 * DESCRIPTION: 实现由utf8编码到gbk编码的转换 
 * 
 * Input: gbkStr,转换后的字符串;  srcStr,待转换的字符串; maxGbkStrlen, gbkStr的最 
 大长度 
 * Output: gbkStr 
 * Returns: -1,fail;>0,success 
 * 
 */  
int utf82gbk(char *gbkStr, const char *srcStr, int maxGbkStrlen) {  
    if (NULL == srcStr) {  
        printf("Bad Parameter\n");  
        return -1;  
    }  
  
    //首先先将utf8编码转换为unicode编码  
    if (NULL == setlocale(LC_ALL, "zh_CN.utf8")) //设置转换为unicode前的码,当前为utf8编码  
            {  
        printf("Bad Parameter\n");  
        return -1;  
    }  
  
    int unicodeLen = mbstowcs(NULL, srcStr, 0); //计算转换后的长度  
    if (unicodeLen <= 0) {  
        printf("Can not Transfer!!!\n");  
        return -1;  
    }  
    wchar_t *unicodeStr = (wchar_t *) calloc(sizeof(wchar_t), unicodeLen + 1);  
    mbstowcs(unicodeStr, srcStr, strlen(srcStr)); //将utf8转换为unicode  
  
    //将unicode编码转换为gbk编码  
    if (NULL == setlocale(LC_ALL, "zh_CN.gbk")) //设置unicode转换后的码,当前为gbk  
            {  
        printf("Bad Parameter\n");  
        return -1;  
    }  
    int gbkLen = wcstombs(NULL, unicodeStr, 0); //计算转换后的长度  
    if (gbkLen <= 0) {  
        printf("Can not Transfer!!!\n");  
        return -1;  
    } else if (gbkLen >= maxGbkStrlen) //判断空间是否足够  
            {  
        printf("Dst Str memory not enough\n");  
        return -1;  
    }  
    wcstombs(gbkStr, unicodeStr, gbkLen);  
    gbkStr[gbkLen] = 0; //添加结束符  
    free(unicodeStr);  
    return gbkLen;  
}  
  
int main(void) {  
    char *s = "中国";  
    int fd = open("test.txt", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);  
    char buf[10];  
    utf82gbk(buf, s, sizeof(buf));  
    write(fd, buf, strlen(buf));  
    close(fd);  
  
    return 1;  
}