139 lines
4.1 KiB
C
139 lines
4.1 KiB
C
#ifndef GBK_ENCODING_H
|
||
#define GBK_ENCODING_H
|
||
|
||
#include <stdint.h>
|
||
#include <stddef.h>
|
||
#include "gbk_map.h" // 引入映射表
|
||
#include <esp_log.h>
|
||
|
||
#define GBK_UTIL_TAG "GBK_ENCODING"
|
||
|
||
#ifdef __cplusplus
|
||
extern "C" {
|
||
#endif
|
||
|
||
/**
|
||
* @brief 将GBK编码转换为UTF-8编码
|
||
*
|
||
* @param gbk_str 输入的GBK编码字符串
|
||
* @param utf8_buf 输出的UTF-8字符串缓冲区
|
||
* @param buf_size 缓冲区大小
|
||
* @return size_t 转换后的字符串长度,如果失败则返回0
|
||
*/
|
||
size_t gbk_to_utf8(const char* gbk_str, char* utf8_buf, size_t buf_size);
|
||
|
||
/**
|
||
* @brief 获取转换GBK到UTF-8所需的缓冲区大小
|
||
*
|
||
* @param gbk_str 输入的GBK编码字符串
|
||
* @return size_t 所需的UTF-8缓冲区大小
|
||
*/
|
||
size_t gbk_to_utf8_buffer_size(const char* gbk_str);
|
||
|
||
/**
|
||
* @brief 将GBK编码转换为UTF-8编码,并分配新内存
|
||
*
|
||
* @param gbk_str 输入的GBK编码字符串
|
||
* @return char* 新分配的UTF-8字符串,使用后需要free
|
||
*/
|
||
char* gbk_to_utf8_alloc(const char* gbk_str);
|
||
|
||
/**
|
||
* @brief 初始化GBK编码转换表
|
||
* 这个函数会加载编码转换表到内存中
|
||
*/
|
||
void gbk_encoding_init(void);
|
||
|
||
// GBK到Unicode的映射表
|
||
static const uint16_t gbk_to_unicode_map[] = {
|
||
0x4E02, 0x4E04, 0x4E05, 0x4E06, 0x4E0F, 0x4E12, 0x4E17, 0x4E1F,
|
||
// ... 这里是完整的映射表
|
||
};
|
||
|
||
// GBK到Unicode的转换函数
|
||
static inline uint16_t gbk_to_unicode(uint8_t ch, uint8_t cl) {
|
||
if (ch <= 0x7F) {
|
||
return ch; // ASCII字符
|
||
}
|
||
|
||
// GBK区域判断
|
||
if (ch >= 0x81 && ch <= 0xFE) {
|
||
if (cl >= 0x40 && cl <= 0x7E || cl >= 0x80 && cl <= 0xFE) {
|
||
uint32_t gbk = (ch << 8) | cl;
|
||
|
||
// GBK-1区域 (0xB0A1-0xF7FE)
|
||
if (gbk >= 0xB0A1 && gbk <= 0xF7FE) {
|
||
uint32_t offset = ((ch - 0xB0) * 94 + (cl - 0xA1));
|
||
return 0x4E00 + offset; // 基本汉字区
|
||
}
|
||
|
||
// GBK-2区域 (0x8140-0xA0FE)
|
||
if (gbk >= 0x8140 && gbk <= 0xA0FE) {
|
||
uint32_t offset = ((ch - 0x81) * 190 + (cl - (cl >= 0x80 ? 0x41 : 0x40)));
|
||
return 0x3000 + offset; // 符号区
|
||
}
|
||
|
||
// GBK-3区域 (0xAA40-0xFEA0)
|
||
if (gbk >= 0xAA40 && gbk <= 0xFEA0) {
|
||
uint32_t offset = ((ch - 0xAA) * 96 + (cl - 0x40));
|
||
return 0x4E00 + 6768 + offset; // 扩展汉字区
|
||
}
|
||
}
|
||
}
|
||
|
||
ESP_LOGW(GBK_UTIL_TAG, "未找到映射的GBK编码: 0x%04X [高字节:0x%02X, 低字节:0x%02X]",
|
||
(ch << 8) | cl, ch, cl);
|
||
return 0x3F; // 返回'?'的Unicode编码
|
||
}
|
||
|
||
// Unicode到UTF-8的转换函数
|
||
static inline int unicode_to_utf8(uint16_t uni, uint8_t *utf8) {
|
||
if (uni <= 0x7F) {
|
||
utf8[0] = (uint8_t)uni;
|
||
return 1;
|
||
}
|
||
else if (uni <= 0x7FF) {
|
||
utf8[0] = 0xC0 | ((uni >> 6) & 0x1F);
|
||
utf8[1] = 0x80 | (uni & 0x3F);
|
||
return 2;
|
||
}
|
||
else {
|
||
utf8[0] = 0xE0 | ((uni >> 12) & 0x0F);
|
||
utf8[1] = 0x80 | ((uni >> 6) & 0x3F);
|
||
utf8[2] = 0x80 | (uni & 0x3F);
|
||
return 3;
|
||
}
|
||
}
|
||
|
||
// GBK到UTF-8的转换函数
|
||
static inline int gbk_to_utf8(const char* gbk, char* utf8, int len) {
|
||
int utf8_len = 0;
|
||
for (int i = 0; i < len;) {
|
||
uint8_t ch = (uint8_t)gbk[i];
|
||
if (ch <= 0x7F) {
|
||
// ASCII字符
|
||
utf8[utf8_len++] = ch;
|
||
i++;
|
||
} else {
|
||
// GBK字符
|
||
if (i + 1 >= len) break;
|
||
uint8_t cl = (uint8_t)gbk[i + 1];
|
||
uint16_t unicode = gbk_to_unicode(ch, cl);
|
||
utf8_len += unicode_to_utf8(unicode, (uint8_t*)&utf8[utf8_len]);
|
||
i += 2;
|
||
}
|
||
}
|
||
utf8[utf8_len] = '\0';
|
||
return utf8_len;
|
||
}
|
||
|
||
// 处理文件名的函数
|
||
static inline void process_filename(const char* filename, char* utf8_filename, int max_len) {
|
||
gbk_to_utf8(filename, utf8_filename, strlen(filename));
|
||
}
|
||
|
||
#ifdef __cplusplus
|
||
}
|
||
#endif
|
||
|
||
#endif /* GBK_ENCODING_H */ |