爱包子的鱼

Open Source, Open Mind,
Open Sight, Open Future!

文本编码转换python脚本

《30天自制操作系统》附带的源码,因为使用了CP932字符集,普通的编辑器打开,注释都是乱码的。以下脚本可以将特定的CP932文本文件转为UTF8编码。

import os


def conv(file_path):
    lines = []
    with open(file_path, "r", encoding="CP932") as f:
        try:
            lines = f.readlines()
        except Exception as e:
            print(file_path)
            print(e)
            return
    if not lines:
        return
    with open(file_path, 'w', encoding='utf8') as nf:
        data = "".join(lines)
        nf.write(data)
        nf.flush()


# exts = ['.bat', '.nas', '.txt', '.h', '.c']
# full_names = ['Makefile']
exts = ['.txt', '.h', '.c']
full_names = []


def need_convert(file_name):
    if file_name.startswith('.'):
        return False
    for ext in exts:
        if file_name.endswith(ext):
            return True
    for fn in full_names:
        if file_name == fn:
            return True

    return False


if __name__ == '__main__':
    work_dir = '/xx/30天自制操作系统光盘'
    for parent, dir_names, filenames in os.walk(work_dir, followlinks=True):
        for filename in filenames:
            if not need_convert(filename):
                continue
            file_path = os.path.join(parent, filename)
            conv(file_path)


标题:文本编码转换python脚本
作者:JoeysCat
地址:http://oook.fun/articles/2021/02/04/1612370834029.html

0 浏览