2012-03-05 18:33
764人阅读
评论(9)
收藏
举报
使用cell.Value2输出中文内容时总是乱码。怀疑是utf-8的原因,转换后结果仍然是乱码。自己再写个转换的再测试,依然是乱码,莫非有BUG!?
下个LUACOM的源码,查看函数tLuaCOMTypeHandler::com2lua和tStringBuffer tUtil::bstr2string。整个过程看起来都OK,但再测试发现,结果字符串少了一个bytes。
在tLuaCOMTypeHandler::com2lua
VT_BSTR分支中返回结果恰好是减去1,将其修改
lua_pushlstring(L, str, str.getSize()-1);修改为lua_pushlstring(L, str, str.getSize());
重新编译LUACOM,再看cell.Value2输出结果,终于正确了。由于没进行全面测试,不知道其此修改会不会引入错误。资源里有个已经编译好的。
--lc是从网上抄来的unicode utf-8 ansi相互转换的函数
- package.cpath=[[C:\Program Files\Lua\5.1\clibs\?.dll;d:\loonlib\sample\lc\?.dll]]
- require "luacom"
- require "lc"
-
- function print_table(t) for k,v in pairs(t) do print(k,v) end end
- excel = luacom.CreateObject("Excel.Application")
- excel.Visible = true
- excel.Workbooks:Add();
- --luacom.ViewTypeLib(excel);
- sheet=excel.Sheets(1);
- local r=sheet:Range("E6");
-
- local s = "严中";
- ws, s2=lc.a2w(s); --0x25 0x4e 0x2d 0x4e 0x00 0x00 6
- print("unicode : " .. lc.bstr(ws, s2));
- us, s2=lc.w2u(ws, s2); --0xe4 0xb8 0xa5 0xe4 0xb8 0xad 0x00 0x00 8
- print("utf8 : " .. lc.bstr(us, s2));
-
- r.Value2=us;
- ws, s2=lc.u2w(r.Value2, s2);
- print("unicode : " .. lc.bstr(ws, s2));
- as, s2=lc.w2a(ws, s2);
- print("ansi : " .. lc.bstr(as, s2));
- print(as);
lc.def
- LIBRARY "lc"
-
- EXPORTS
- luaopen_lc
lc.h
- extern "C" {
- #include "lua.h"
- #include "lualib.h"
- #include "lauxlib.h"
- int luaopen_local(lua_State* L);
- }
-
- #include <locale.h>
- #include <cstring>
- #ifdef WIN32
- #include <windows.h>
- #include <winnls.h>
- #else
- #include <cstdlib>
- #endif
-
-
- #define LN_lc "lc"
- int lua_a2w(lua_State* L);
- int lua_u2w(lua_State* L);
- int lua_w2a(lua_State* L);
- int lua_w2u(lua_State* L);
- int lua_u2a(lua_State* L);
- int lua_a2u(lua_State* L);
- int lua_bstr(lua_State* L);
-
- int lua_help(lua_State* L);
- wchar_t* mb2wc(const char* mbstr, int& s2, int cp);
- char* wc2mb(const wchar_t* wcstr, int& s2, int cp);
lc.cpp
- #include "lc.h"
- //g++ -shared -s -o lc.dll -O3 lc.cpp lc.def -llua5.1 -DWIN32 -I%loon%/lua/src -L%loon%/lib/gcc_dll/debug -Wl,--out-implib,liblc.a
-
- int lua_bstr(lua_State* L) {
- const char* s = luaL_optstring(L, 1, "");
- int len = luaL_optnumber(L, 2, 0);
- if (strcmp(s, "")==0 || 0==len) {
- lua_pushstring(L, s);
- } else {
- luaL_Buffer b;
- luaL_buffinit(L, &b);
- char* byte = (char*)malloc(64);
- for (int i=0; i<len; ++i) {
- sprintf(byte, "0x%02x ", (unsigned char)*s++);
- luaL_addstring(&b, byte);
- }
- free(byte);
- luaL_pushresult(&b);
- }
- return 1;
- }
-
- int lua_u2w(lua_State* L) {
- int result = 0;
- size_t len = 0;
- const char* mbstr = lua_tolstring(L, 1, &len);
- if (mbstr && len>0) {
- int s2 = 0;
- wchar_t* wcstr = mb2wc(mbstr, s2, CP_UTF8);
- if (wcstr) {
- lua_pushlstring(L, (const char*)wcstr, s2);
- lua_pushnumber(L, s2);
- delete[] wcstr;
- result = 2;
- }
- }
- return result;
- }
-
- int lua_a2w(lua_State* L) {
- int result = 0;
- size_t len = 0;
- const char* mbstr = lua_tolstring(L, 1, &len);
- if (mbstr && len>0) {
- int s2 = 0;
- wchar_t* wcstr = mb2wc(mbstr, s2, CP_ACP);
- if (wcstr) {
- lua_pushlstring(L, (const char*)wcstr, s2);
- lua_pushnumber(L, s2);
- delete[] wcstr;
- result = 2;
- }
- }
- return result;
- }
-
- int lua_w2a(lua_State* L) {
- int result = 0;
- size_t len = 0;
- const char* wcstr = lua_tolstring(L, 1, &len);
- if (wcstr && len>0) {
- int s2 = 0;
- char* mbstr = wc2mb((wchar_t*)wcstr, s2, CP_ACP);
- if (mbstr) {
- lua_pushlstring(L, mbstr, s2);
- lua_pushnumber(L, s2);
- delete[] mbstr;
- result = 2;
- }
- }
- return result;
- }
-
- int lua_w2u(lua_State* L) {
- int result = 0;
- size_t len = 0;
- const char* wcstr = lua_tolstring(L, 1, &len);
- if (wcstr && len>0) {
- int s2 = 0;
- char* mbstr = wc2mb((wchar_t*)wcstr, s2, CP_UTF8);
- if (mbstr) {
- lua_pushlstring(L, mbstr, s2);
- lua_pushnumber(L, s2);
- delete[] mbstr;
- result = 2;
- }
- }
- return result;
- }
-
- int lua_u2a(lua_State* L) {
- int result = 0;
- size_t len = 0;
- const char* mbstr = lua_tolstring(L, 1, &len);
- if (mbstr && len>0) {
- int s2 = 0;
- wchar_t* wcstr = mb2wc(mbstr, s2, CP_UTF8);
- if (wcstr) {
- char* nmbstr = wc2mb(wcstr, s2, CP_ACP);
- if (nmbstr) {
- lua_pushlstring(L, nmbstr, s2);
- lua_pushnumber(L, s2);
- result = 2;
- delete[] nmbstr;
- }
- delete[] wcstr;
- }
- }
- return result;
- }
-
- int lua_a2u(lua_State* L) {
- int result = 0;
- size_t len = 0;
- const char* mbstr = lua_tolstring(L, 1, &len);
- if (mbstr && len>0) {
- int s2 = 0;
- wchar_t* wcstr = mb2wc(mbstr, s2, CP_ACP);
- if (wcstr) {
- char* nmbstr = wc2mb(wcstr, s2, CP_UTF8);
- if (nmbstr) {
- lua_pushlstring(L, nmbstr, s2);
- lua_pushnumber(L, s2);
- result = 2;
- delete[] nmbstr;
- }
- delete[] wcstr;
- }
- }
- return result;
- }
-
- wchar_t* mb2wc(const char* mbstr, int& s2, int cp) {
- wchar_t* wcstr = NULL;
- #ifdef WIN32
- int size = MultiByteToWideChar(cp, 0, mbstr, -1, NULL, 0);
- #else
- size_t size = mbstowcs(NULL, mbstr, 0);
- #endif
- wcstr = new wchar_t[size];
- if (wcstr) {
- memset(wcstr, 0, size * sizeof(wchar_t));
- #ifdef WIN32
- int ret = MultiByteToWideChar(cp, 0, mbstr, -1, wcstr, size);
- if (ret == 0) { // MultiByteToWideChar returns 0 if it does not succeed.
- #else
- size_t ret = mbstowcs(wcstr, mbstr, size+1);
- if (ret == -1) {
- #endif
- delete[] wcstr;
- wcstr = NULL;
- }
- s2 = 2*size;
- }
- return wcstr;
- }
-
- char* wc2mb(const wchar_t* wcstr, int& s2, int cp) {
- char* mbstr = NULL;
- #ifdef WIN32
- int size = WideCharToMultiByte(cp, 0, wcstr, -1, NULL, 0, NULL, NULL);
- #else
- size_t size = wcstombs(NULL, wcstr, 0);
- #endif
- mbstr = new char[size];
- if (mbstr) {
- memset(mbstr, 0, size * sizeof(char));
- #ifdef WIN32
- int ret = WideCharToMultiByte(cp, 0, wcstr, -1, mbstr, size, NULL, NULL);
- if (ret == 0) { // MultiByteToWideChar returns 0 if it does not succeed.
- #else
- size_t ret = wcstombs(mbstr, wcstr, size+1);
- if (ret == -1) {
- #endif
- delete[] mbstr;
- mbstr = NULL;
- }
- s2 = size;
- }
- return mbstr;
- }
-
- int lua_help(lua_State* L) {
- const char* s=
- "Simple Characters Transformation\n"
- " a2w(ansi to unicode)\n"
- " u2w(utf8 to unicode)\n"
- " w2a(unicode to ansi)\n"
- " w2u(unicode to utf8)\n"
- " u2a(utf8 to ansi)\n"
- " a2u(ansi to utf8)\n"
- " bstr(bytes of str)\n"
- " help(show this)\n\n"
- " example :\n"
- " local s = \"I like lua\"\n"
- " print(lc.bstr(s, string.len(s)+1))\n"
- " local ws, s2 = lc.a2w(s)\n"
- "wunoman@qq.com 2012/03/06\n"
- ;
- lua_pushstring(L, s);
-
- return 1;
- }
-
- luaL_reg lrg_lc[] = {
- {"a2w", lua_a2w},
- {"u2w", lua_u2w},
- {"w2a", lua_w2a},
- {"w2u", lua_w2u},
- {"u2a", lua_u2a},
- {"a2u", lua_a2u},
- {"bstr", lua_bstr},
- {"help", lua_help},
- {NULL, NULL}
- };
-
- extern "C" int luaopen_lc(lua_State* L) {
- luaL_register(L, LN_lc, lrg_lc);
- return 1;
- }
- 4楼 windtailljj 2012-05-12 22:19发表 [回复]
- 我看了下你说的那个函数,貌似没有问题,因为上面的bstr2string()函数有一个默认参数是nullterminated=true,返回的字符串中带有一个'\0',而他使用的pushlstring(而不是pushstring),确实应该 -1 的啊,你这个修改应该是碰巧解决了问题吧。
- Re: alga_1 2012-06-02 14:52发表 [回复]
- 回复windtailljj:很可能是巧合。
- 3楼 danninggao 2012-04-24 14:44发表 [回复]
- 虽然不知道是什么原因 但是胡乱尝试下好像解决问题了,再获得EXCEL表中值时 local sRet = oExcel.Activesheet.Cells(Row, Column).Value2 ,些时为UTF-8,然后将字符串长度减2,local nTemp = string.sub(nRet,1,string.len(nRet)-2),再将utf-8转为gbk,这时后就正常了
- 2楼 danninggao 2012-04-18 14:41发表 [回复]
- 测试了下 发现当从EXCEL中读的汉字为4个的时候 显示有点不正常
- Re: alga_1 2012-04-22 10:54发表 [回复]
- 回复danninggao:出现乱码?之前试了一下姓名都是三个字以下的。
- Re: danninggao 2012-04-24 12:24发表 [回复]
- 回复alga_1:又试了下 比如EXCEL中四个汉字"朋友朋友" local sRet = oExcel.Activesheet.Cells(Row, Column).Value2 获取到值后,然后通过函数conver将utf-8转成gbk,print(conver(sRet)) 输出结果为“朋友朋友 4” 如果我加个赋值过程local temp = conver(sRet) print(temp) 然后输出就正常。还发现一个新问题,读取temp的值为"朋友" string.len(temp) 长度为6,而实际上长度为4 ,就导致读出来的变量无法进行操作,如print(temp.."测试") 输出结果还是为"朋友"
- Re: alga_1 2012-04-30 20:29发表 [回复]
- 回复danninggao:我是这样试的:
package.cpath=[[?.dll]] require "luacom" require "lc"
function print_table(t) for k,v in pairs(t) do print(k,v) end end excel = luacom.CreateObject("Excel.Application") excel.Visible = true excel.Workbooks:Add(); sheet=excel.Sheets(1); local r=sheet:Range("E6"); local s = "严中严中"; ws, s2=lc.a2w(s); print("unicode : " .. lc.bstr(ws, s2)); us, s2=lc.w2u(ws, s2); print("utf8 : " .. lc.bstr(us, s2)); r.Value2=us; ws, s2=lc.u2w(r.Value2, s2); print("unicode : " .. lc.bstr(ws, s2)); as, s2=lc.w2a(ws, s2); print("ansi : " .. lc.bstr(as, s2)); print(as); 输出结果: C:\WINDOWS\system32\cmd.exe /c lua libexcel.lua unicode : 0x25 0x4e 0x2d 0x4e 0x25 0x4e 0x2d 0x4e 0x00 0x00 utf8 : 0xe4 0xb8 0xa5 0xe4 0xb8 0xad 0xe4 0xb8 0xa5 0xe4 0xb8 0xad 0x00 unicode : 0x25 0x4e 0x2d 0x4e 0x25 0x4e 0x2d 0x4e 0x00 0x00 ansi : 0xd1 0xcf 0xd6 0xd0 0xd1 0xcf 0xd6 0xd0 0x00 严中严中 Hit any key to close this window... 输入是4个汉字,数了一下,输出为9个byte,其中包含了结束符0x00,结果算是正确的。
- 1楼 quosin 2012-03-21 18:08发表 [回复]
- 你好,我也遇到这个问题,急需解决,可不可以把你改好的库给我用一下!qq号 674438136 拜托啦
- Re: alga_1 2012-04-21 20:07发表 [回复]
- 回复qushiheng:改好的库放在链接里,上面“资源”点一下
|