python基础(6)文件IO操作 open函数返回一个文件对象 mode定义文件的读写模式 mode 作用 'r' 以

文件IO操作

open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)

open函数返回一个文件对象
mode定义文件的读写模式

mode	作用
'r'	以只读的模式打开文件，不能写
'w'	以只写的模式打开文件，不能读，会覆盖已有的内容
'a'	追加模式打开文件，只能写，追加内容
'x'	打开的文件必须是不存在的，文件存在报错,只写
'+'	补充其他模式缺少的操作，比如：'r+'可以读写文件
't'	以文本(字符)的方式打开文件，默认值
'b'	以二进制(字节)的方式打开文件

buffering定义缓冲策略，0是不使用缓冲，1是使用line buffering(仅在文本模式下有效)，大于1是定义字节数的chunk buffer，如果没有定义，使用操作系统的buffer
encoding：定义字符编码，写入和读取的字符编码不同会报错
errors：当encode和decode错误时的处理，有兴趣的去看下

newline在开启universal newlines mode 下有用，可以把\r、\r\n、\n都当做换行符，mode="U"开启universal newlines模式，根据官方文档，python3后默认就是universal newlines mode，所以可以不用管这个参数
closefd:关闭文件对象是否关闭文件描述符，True就是关闭，False不关闭，默认关闭
opener:看文档的描述是可以通过其他文件描述符打开一个文件

示例

# r :如果以r mode打开文件，文件必须存在才能打开，不存在报FileNotFoundError错误
f = open("test")
# False：r mode无法写入内容
print(f.writable())
print(f.read())

# w :文件不存在可以创建，已存在的文件，将清空内容写入，不能读取文件
f = open("test",mode="w")
print(f.readable())
print(f.write("3.1415926"))

# a :文件不存在可以创建，已存在的文件，在文件的EOF后追加内容,不能读
f = open("test",mode="a")
print(f.readable())
print(f.write("3.1415926"))

# x:打开的文件必须是不存在的文件，文件存在将会报错,只能写不能读
f = open("test1",mode="x")
print(f.readable())
print(f.read())

# b: 以字节的方式读取，注意该模式不能单独使用，需要配合rwx其中一个使用,注意字节模式操作时，是没有字符编码这一说的，定义encoding将会报错,test1中是3个'啊'字
f = open("test1", mode="rb")
print(f.readable())
print(f.read())

# +:补充读或写的能力

# 补充r没有的写模式
f = open("test","r+")
# 补充读的能力
f = open("test","w+")
f = open("test","a+")

# 当读和写的字符编码不同时，可能会报错
f = open('test',mode='w',encoding="gbk")
f.write('兰博基尼')
f.close()

f1 = open('test',encoding='utf-8')
print(f1.read())
f1.close()

文件指针

mode = r时，指针从第一个字符开始，mode = rb,指针从第一个字节开始
mode = w时，指针从第一个字符开始，mode = wb,指针从第一个字节开始
mode =a时，指针从EOF后的第一个字符开始,mode=ab，指针从EOF后的第一个字节开始

f=open("test")
print(f.tell()) # 输出指针位置

read()

read(-1):默认是-1，从头读到尾
read(1):根据打开的模式,读取一个字符或一个字节

f = open('test',mode='w',encoding="gbk")
f.write('兰博基尼')
print(f.tell())
f.close()

f = open('test',mode='r')
print(f.read(1))
print(f.tell())
f.close()

f = open('test',mode='rb')
print(f.read(1))
print(f.tell())
f.close()

write()

write(str):根据打开的模式，写入字节或字符，并返回写入的字节或字符数
writelines(lines):写入字符串列表

filename = 'o:/test.txt'
f = open(filename, 'w+')
lines = ['abc', '123\n', 'magedu'] # 需提供换行符
# for line in lines:
# f.write(line)
f.writelines(lines)
f.seek(0) # 回到开始
print(f.read())
f.close()

上下文管理

文件对象这种打开资源并一定要关闭的对象，为了保证其打开后一定关闭，为其提供了上下文支持

filename = 'test'
with open(filename) as f:
    print(1, f.closed)
    temp = 100
    try:
        print(f.write('abcd'))  # r模式写入失败，抛异常
    except:
        print("出错了")
        pass
print(2, f.closed)  # with中不管是否抛异常，with结束时都会保证关闭文件对象
print(temp)

with不像函数会开启作用域，在with中的temp变量可以在with外正常访问

文件的遍历

with open("test2", mode="w") as f:
    f.writelines("\n".join(list(map(str, range(100, 105)))))

with open("test2") as f:
    for line in f: # 文件对象时可迭代对象，逐行遍历
        print(line, end="")

路径操作

from os import path

p = path.join("/etc/sysconfig", "network")
# p是字符串类型
print(type(p), p)
# 文件是否存在
print(path.exists(p))
# 切割成dirname和basename
print(path.split(p))
# 仅windows使用
print(path.splitdrive("E:\\test\test1"))
# 输出目录路径
print(path.dirname(p))
# 输出文件名
print(path.basename(p))
# 文件的绝对路径
print(path.abspath(p))
# 当前文件的绝对路径
print(path.abspath(""))
# True
print("D:\\" == path.dirname("D:\\"))

# 输出各级dirname
p1 = path.abspath(__file__)
print(p1)
while p1 != path.dirname(p1):
    p1 = path.dirname(p1)
    print(p1)

Path类

从3.4开始Python提供了pathlib模块，使用Path类操作目录更加方便

初始化

from pathlib import Path

p = Path()
p1 = Path("a", "b/c", "d")
p2 = Path("a", Path("b", "c/d"))
print(p)
print(p1)
print(p2)

拼接

操作符/是除法，但是可以拼接路径

path对象 / 字符串
字符串 / path对象
path对象 / path对象

from pathlib import Path

print(Path() / "a/b" / "c")
print("c" / Path() / "a/b")
print(Path() / Path("/ab/cd") / "e")
# 注意这种写法是错误的，因为 / 的左右2边必须有一个Path对象
print("a" / "b" / Path())

分解

parts属性，会返回目录各部分的元组

from pathlib import Path
p = Path("a/b/c/d")
print(p.parts)

from pathlib import Path

p = Path('/data/mysql/install/mysql.tar.gz')
print(p.parent)
print(list(p.parents))
for x in p.parents:  # 可迭代对象
    print(x)

name：相当于basename
suffix：name的名（不包含后缀）
stem：name的扩展名
name=stem+suffix
with_name()：替换name
with_suffix()：替换后缀
with_stem()：name的名（不包含后缀），windows下不能用

from pathlib import Path

p = Path('/data/mysql/install/mysql.tar.gz')
print(p.name)
print(p.suffix)
print(p.with_name("redis"))
print(p.with_name("redis").with_suffix(".tar"))

全局方法

cwd():返回文件的当前目录
home():返回家目录。linux是/home/root等，windows是C:\user\fan
resolve():linux下，如果是软连接会解析到真正的文件
absolute()：获取文件的绝对路径

判断方法

exists() 目录或文件是否存在
is_dir() 是否是目录，目录存在返回True
is_file() 是否是普通文件，文件存在返回True
is_symlink() 是否是软链接
is_socket() 是否是socket文件
is_block_device() 是否是块设备
is_char_device() 是否是字符设备
is_absolute() 是否是绝对路径

其他操作

rmdir() 删除空目录。没有提供判断目录为空的方法
touch(mode=0o666, exist_ok=True) 创建一个文件
as_uri() 将路径返回成URI，例如'file:///etc/passwd'
mkdir(mode=0o777, parents=False, exist_ok=False)parents，是否创建父目录，True等同于mkdir -p。False时，父目录不存在，则抛出FileNotFoundErrorexist_ok参数，在3.5版本加入。False时，路径存在，抛出FileExistsError；True时，FileExistsError被忽略
iterdir() 迭代当前目录，不递归

from pathlib import Path
p = Path('D:/a/b/c/d')
p.mkdir(parents=True, exist_ok=True)
(p / 'test').touch()
# 返回所有的dirname
print(list(p.parents))
# 遍历D盘
for x in p.parents[len(p.parents) - 1].iterdir(): # 不支持负索引
    if x.is_dir():
        print('dir =', x)
    elif x.is_file():
        print('file =', x)
else:
    print('other =', x)

通配符

glob():通配给定的模式，返回生成器对象
rglob():通配给定的模式，递归目录，返回生成器对象

list(p.glob('test*')) # 返回当前目录对象下的test开头的文件
list(p.glob('**/*.py')) # 递归所有目录，等同rglob
list(p.glob('**/*'))
g = p.rglob('*.py') # 生成器，递归
next(g)
list(p.rglob('*.???')) # 匹配扩展名为3个字符的文件
list(p1.rglob('[a-z]*.???')) # 匹配字母开头的且扩展名是3个字符的文件

shutil模块

copyfileobj(fsrc, fdst, length=16*1024)

源码看起来很简单，fsrc和fdst是2个文件对象，fsrc可读，fdst可写

from shutil import *

fsrc = open("test")
fdst = open("dst_test", "w")
copyfileobj(fsrc, fdst)

copyfile(src, dst, *, follow_symlinks=True)

从源码上看，src和dst是文件路径字符串。follow_symlinks=True：如果src是软链接，dst是复制软连接指向的文件，False则复制软链接
_samefile函数判断src和dst是否是同一个文件，是的话就会报错
然后获取src和dst的stat，相当于linux stat命令，如果是FIFO文件就会报错
follow_symlinks这个参数如果是True，src是链接文件，就会复制链接指向的源文件，为False，就会创建软连接指向src

copymode(src, dst, *, follow_symlinks=True)

代码很简单，如果src和dst都是链接文件，follow_symlinks=False，就会把src的权限复制给dst
看到chmod就知道获取src mode，然后复制给dst

copystat(src, dst, *, follow_symlinks=True):源码有点复杂，懒得看了，直接看官方文档解释

可以理解为在linux下执行的函数，复制stat值，比如mode、atime、ctime、mtime
follow_symlinks=True，src为链接文件，复制指向的文件stat，为False，复制链接的stat

copy(src, dst, *, follow_symlinks=True)

dst可以是文件或目录，内部会封装成dst/src文件名
返回文件名

def copy2(src, dst, *, follow_symlinks=True)

和copy函数类似，就是复制文件的元数据多点，用的copystat函数

copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,ignore_dangling_symlinks=False)

# 3.6源码
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,ignore_dangling_symlinks=False):
    names = os.listdir(src)
    if ignore is not None:
        ignored_names = ignore(src, names)
    else:
        ignored_names = set()

    os.makedirs(dst)
    errors = []
    for name in names:
        if name in ignored_names:
            continue
        srcname = os.path.join(src, name)
        dstname = os.path.join(dst, name)
        try:
            if os.path.islink(srcname):
                linkto = os.readlink(srcname)
                if symlinks:
                    # We can't just leave it to `copy_function` because legacy
                    # code with a custom `copy_function` may rely on copytree
                    # doing the right thing.
                    os.symlink(linkto, dstname)
                    copystat(srcname, dstname, follow_symlinks=not symlinks)
                else:
                    # ignore dangling symlink if the flag is on
                    if not os.path.exists(linkto) and ignore_dangling_symlinks:
                        continue
                    # otherwise let the copy occurs. copy2 will raise an error
                    if os.path.isdir(srcname):
                        copytree(srcname, dstname, symlinks, ignore,
                                 copy_function)
                    else:
                        copy_function(srcname, dstname)
            elif os.path.isdir(srcname):
                copytree(srcname, dstname, symlinks, ignore, copy_function)
            else:
                # Will raise a SpecialFileError for unsupported file types
                copy_function(srcname, dstname)
        # catch the Error from the recursive copytree so that we can
        # continue with other files
        except Error as err:
            errors.extend(err.args[0])
        except OSError as why:
            errors.append((srcname, dstname, str(why)))
    try:
        copystat(src, dst)
    except OSError as why:
        # Copying file access times may fail on Windows
        if getattr(why, 'winerror', None) is None:
            errors.append((src, dst, str(why)))
    if errors:
        raise Error(errors)
    return dst

从函数定义来看，可以看出这是个高阶函数，ignore和copy_function参数都是传入函数对象，可以自定义，copy_function默认使用copy2函数进行拷贝，ignore定义函数接收src,names2个参数，返回一个set集合，拷贝的时候会忽略这个集群和文件
symlinks=True，将会把链接文件拷贝成链接文件，symlinks=False，将会把链接指向的文件拷贝，ignore_dangling_symlinks=True，忽略拷贝时发生的错误
该函数是递归拷贝目录

首先看第一处，names是src下的文件，相当于ls命令，然后调用了自定义的ignore方法，根据下面if name in ignored_names代码，自定义的ignore函数应该返回一个容器，当文件名在其中时就会continue
第二处后面再说
第三处也就是递归复制的代码

实战

选择一个已存在的目录作为当前工作目录，在其下创建a/b/c/d这样的子目录结构并在这些子目录的不同
层级生成50个普通文件，要求文件名由随机4个小写字母构成。
将a目录下所有内容复制到当前工作目录dst目录下去，要求复制的普通文件的文件名必须是x、y、z开
头。
举例，假设工作目录是/tmp，构建的目录结构是/tmp/a/b/c/d。在a、b、c、d目录中放入随机生成的文
件，这些文件的名称也是随机生成的。最终把a目录下所有的目录也就是b、c、d目录，和文件名开头是
x、y、z开头的文件。

import random
from functools import reduce
from pathlib import Path
from shutil import *

# 26个字母


alphabet = [chr(x) for x in range(ord('a'), ord('z') + 1)]

path = Path("a/b/c/d") / "e"

path.parent.mkdir(parents=True, exist_ok=True)

for parent in list(path.parents)[:-1]:
    [(parent / reduce(lambda x, y: x + y, random.choices(alphabet, k=4))).touch(exist_ok=True) for count in range(50)]

copytree(list(path.parents)[-2], "./dst", ignore=lambda src, names: set(
    filter(
        lambda x: (not (x.startswith('x') or x.startswith('y') or x.startswith('z'))) and not (Path(src) / x).is_dir(),
        names)))

上面比较难的就是copytree函数中ignore函数，从前面的源码中来看，ignore函数需要接受2个参数，一个src和src下的所有文件组成的列表names，所以定义lambda src,names: set()的结构，因为需要复制以x，y，z开头的文件，所以使用filter的高阶函数，所以又变成lambda src,names: set(filter(,names))，这里会有一个坑，如果这src下有目录也会被当文件，所以除了要判断是否以xyz开头之外还需要判断是否为目录，目录会交给上图第三处递归处理。上图第二处srcname是把路径和文件名联合起来传入递归的copytree函数，(Path(src) / x).is_dir()所以可以用这个式子来判断是不是目录
这玩意还是得自己看了源码，思考后才能领会，很难去描述过程