- Item16:使用get而不是In和KeyError來處理缺失的字典鍵
# 比如有三明治店,需要添加新的面包:
counters = {
'pumpernickel': 2,
'sourdough': 1,
}
# 用if來處理,需要訪問三次key(一次是in,一次是取數,一次是賦值)。
key = 'wheat'
if key in counters:
count = counters[key]
else:
count = 0
counters[key] = count + 1
# 利用try-except,可以減少一次in的訪問。
try:
count = counters[key]
except KeyError:
count = 0
counters[key] = count + 1
# 或者使用dict內置的get方法,同樣可以達到效果:
count = counters.get(key, 0)
counters[key] = count + 1
再次體會從in到try-except的過程:
if key not in counters:
counters[key] = 0
counters[key] += 1
if key in counters:
counters[key] += 1
else:
counters[key] = 1
try:
counters[key] += 1
except KeyError:
counters[key] = 1
如果是列表,
votes = {
'baguette': ['Bob', 'Alice'],
'ciabatta': ['Coco', 'Deb'],
}
key = 'brioche'
who = 'Elmer'
# in方式
if key in votes:
names = votes[key]
else:
votes[key] = names = [] # 利用三元賦值,以及指針的特性,直接對names操作就不用再次訪問votes。
names.append(who)
print(votes)
>>>
{'baguette': ['Bob', 'Alice'],
'ciabatta': ['Coco', 'Deb'],
'brioche': ['Elmer']}
# try-except方式
try:
names = votes[key]
except KeyError:
votes[key] = names = []
names.append(who)
# get方式
names = votes.get(key)
if names is None:
votes[key] = names = []
names.append(who)
# get+walrus方式(item10)
if (names := votes.get(key)) is None:
votes[key] = names = []
names.append(who)
也可以用dict的setdefault方法達到目的。但是setdefault的命名不夠直觀。它在無key的時候,set第二個參數。有key的時候,返回當前key對應的value。
names = votes.setdefault(key, [])
names.append(who)
不僅如此,這個默認的value,不會每次復制新的進去,而是淺拷貝。
data = {}
key = 'foo'
value = []
data.setdefault(key, value)
print('Before:', data)
value.append('hello')
print('After: ', data)
>>>
Before: {'foo': []}
After: {'foo': ['hello']}
如果回到最開始的例子,用setdefault來實現。其中,setdefault對缺失的key,需要做兩次賦值操作,比較多余。
count = counters.setdefault(key, 0)
counters[key] = count + 1
替代的方式,可以使用下面的item17。
- Item17:用defaultdict而不是setdefault來處理內部狀態的缺失項
比如現在有一個字典,記錄去過的國家對應的城市。
visits = {
'Mexico': {'Tulum', 'Puerto Vallarta'},
'Japan': {'Hakone'},
}
visits.setdefault('France', set()).add('Arles') # Short
if (japan := visits.get('Japan')) is None: # Long
visits['Japan'] = japan = set()
japan.add('Kyoto')
print(visits)
>>>
{'Mexico': {'Tulum', 'Puerto Vallarta'},
'Japan': {'Kyoto', 'Hakone'},
'France': {'Arles'}}
可以嘗試用類來封裝這個復雜的過程。但是dict.setdefault方法的命名依然讓人迷惑。
class Visits:
def __init__(self):
self.data = {}
def add(self, country, city):
city_set = self.data.setdefault(country, set())
city_set.add(city)
這時用collections的defaultdict來指定默認的value,可以使代碼更加清晰可讀。
from collections import defaultdict
class Visits:
def __init__(self):
self.data = defaultdict(set)
def add(self, country, city):
self.data[country].add(city)
visits = Visits()
visits.add('England', 'Bath')
visits.add('England', 'London')
print(visits.data)
>>>
defaultdict(<class 'set'>, {'England': {'London', 'Bath'}})
- Item18:知道如何用missing構建基于鍵的默認值
盡管get和defaultdict可以減少函數調用,有時候setdefault和defaultdict都無法起效。
比如,現在有一個文件及句柄的一個字典。如果對應句柄有的話,直接讀文件。沒有的話,嘗試打開文件,并且存句柄:
pictures = {}
path = 'profile_1234.png'
# 用walrus來做(open調用少)
if (handle := pictures.get(path)) is None:
try:
handle = open(path, 'a+b')
except OSError:
print(f'Failed to open path {path}')
raise
else:
pictures[path] = handle
handle.seek(0)
image_data = handle.read()
# 或者用setdefault來做。(每次都會調用open,開銷大。)
try:
handle = pictures.setdefault(path, open(path, 'a+b'))
except OSError:
print(f'Failed to open path {path}')
raise
else:
handle.seek(0)
image_data = handle.read()
from collections import defaultdict
def open_picture(profile_path):
try:
return open(profile_path, 'a+b')
except OSError:
print(f'Failed to open path {profile_path}')
raise
pictures = defaultdict(open_picture)
handle = pictures[path]
handle.seek(0)
image_data = handle.read()
>>>
Traceback ...
TypeError: open_picture() missing 1 required positional
argument: 'profile_path'
# 問題就在defaultdict默認接收的函數,是無參的。幸運的是,可以繼承dict,然后實現__missing__方法達到效果。
class Pictures(dict):
def __missing__(self, key):
value = open_picture(key)
self[key] = value
return value
pictures = Pictures()
handle = pictures[path]
handle.seek(0)
image_data = handle.read()
函數篇:
- Item19:當拆包多個返回值時,不要賦值給超過三個變量(返回一個class或者namedtuple實例)
def get_stats(numbers):
minimum = min(numbers)
maximum = max(numbers)
return minimum, maximum
lengths = [63, 73, 72, 60, 67, 66, 71, 61, 72, 70]
minimum, maximum = get_stats(lengths) # Two return values
print(f'Min: {minimum}, Max: {maximum}')
>>>
Min: 60, Max: 73
類似的賦值行為:
first, second = 1, 2
assert first == 1
assert second == 2
def my_function():
return 1, 2
first, second = my_function()
assert first == 1
assert second == 2
def get_avg_ratio(numbers):
average = sum(numbers) / len(numbers)
scaled = [x / average for x in numbers]
scaled.sort(reverse=True)
return scaled
longest, *middle, shortest = get_avg_ratio(lengths)
print(f'Longest: {longest:>4.0%}')
print(f'Shortest: {shortest:>4.0%}')
>>>
Longest: 108%
Shortest: 89%
語句過長,且容易產生問題,比如變量命名顛倒等。(盡量減少返回的變量)
def get_stats(numbers):
minimum = min(numbers)
maximum = max(numbers)
count = len(numbers)
average = sum(numbers) / count
sorted_numbers = sorted(numbers)
middle = count // 2
if count % 2 == 0:
lower = sorted_numbers[middle - 1]
upper = sorted_numbers[middle]
median = (lower + upper) / 2
else:
median = sorted_numbers[middle]
return minimum, maximum, average, median, count
minimum, maximum, average, median, count = get_stats(lengths)
print(f'Min: {minimum}, Max: {maximum}')
print(f'Average: {average}, Median: {median}, Count {count}')
>>>
Min: 60, Max: 73
Average: 67.5, Median: 68.5, Count 10
- Item20:拋出異常比返回None要好
假如現在要一個數除以另一個數:
def careful_divide(a, b):
try:
return a / b
except ZeroDivisionError:
return None
那么實際是根據是否為None來處理異常:
x, y = 1, 0
result = careful_divide(x, y)
if result is None:
print('Invalid inputs')
編程者此時可能會錯誤地以為是返回False來處理,那么此時當結果為0的時候,運行結果錯誤:
x, y = 0, 5
result = careful_divide(x, y)
if not result:
print('Invalid inputs') # This runs! But shouldn't
>>>
Invalid inputs
此時,可以拆成兩部分返回,一部分是是否正常,另一部分是返回值:
def careful_divide(a, b):
try:
return True, a / b
except ZeroDivisionError:
return False, None
那么這樣就可以用拆包的方式來進行:
success, result = careful_divide(x, y)
if not success:
print('Invalid inputs')
_, result = careful_divide(x, y)
if not result:
print('Invalid inputs')
但是用戶容易又被None和0的問題困惑。
此時不要返回None,而是拋出異常:
def careful_divide(a, b):
try:
return a / b
except ZeroDivisionError as e:
raise ValueError('Invalid inputs')
x, y = 5, 2
try:
result = careful_divide(x, y)
except ValueError:
print('Invalid inputs')
else:
print('Result is %.1f' % result)
>>>
Result is 2.5
最好加上返回的類別,然后在文檔中表明拋出什么異常。這樣,用戶就可以處理異常。整理如下:
def careful_divide(a: float, b: float) -> float:
"""Divides a by b.
Raises:
ValueError: When the inputs cannot be divided.
"""
try:
return a / b
except ZeroDivisionError as e:
raise ValueError('Invalid inputs')
- Item21:知道閉包是如何跟變量作用域交互的
當需要某個信息(2,3,5,7)的最優先級排序時,可以通過以下方式實現:
def sort_priority(values, group):
def helper(x):
if x in group:
return (0, x) # 先按照是否在組內,再按照組內大小排序。
return (1, x)
values.sort(key=helper)
numbers = [8, 3, 1, 2, 5, 4, 7, 6]
group = {5, 3, 2, 7}
sort_priority(numbers, group)
print(numbers)
>>>
[2, 3, 5, 7, 1, 4, 6, 8]
def sort_priority2(numbers, group):
found = False
def helper(x):
if x in group:
found = True # Seems simple
return (0, x)
return (1, x)
numbers.sort(key=helper)
return found
found = sort_priority2(numbers, group)
print('Found:', found)
print(numbers)
>>>
Found: False
[2, 3, 5, 7, 1, 4, 6, 8]
主要利用了閉包和作用域。此處的found其實是helper里面的found,而不是sort_priority2的found。
def sort_priority2(numbers, group):
found = False # Scope: 'sort_priority2'
def helper(x):
if x in group:
found = True # Scope: 'helper' -- Bad!
return (0, x)
return (1, x)
numbers.sort(key=helper)
return found
可以利用nonlocal關鍵字,注意:nonlocal關鍵字不會向上遍歷到模塊級的作用域。(global可以)
def sort_priority3(numbers, group):
found = False # Scope: 'sort_priority2'
def helper(x):
nonlocal found # Added
if x in group:
found = True
return (0, x)
return (1, x)
numbers.sort(key=helper)
return found
但是nonlocal可能會由于嵌套導致代碼變得復雜和難以讀懂,此時可以用類來包裹。
class Sorter:
def __init__(self, group):
self.group = group
self.found = False
def __call__(self, x):
if x in self.group:
self.found = True
return (0, x)
return (1, x)
sorter = Sorter(group)
numbers.sort(key=sorter)
assert sorter.found is True
- Item22:用變量位置參數減少視覺噪聲
假設我想打印一段message還有一段數值(有則打印,沒有就不打印)。
def log(message, values):
if not values:
print(message)
else:
values_str = ', '.join(str(x) for x in values)
print(f'{message}: {values_str}')
log('My numbers are', [1, 2])
log('Hi there', [])
>>>
My numbers are: 1, 2
Hi there
當沒有數值的時候,需要傳遞空列表,比較繁瑣。最好應該不傳遞數值。
此時可以使用*來處理:
def log(message, *values): # The only difference
if not values:
print(message)
else:
values_str = ', '.join(str(x) for x in values)
print(f'{message}: {values_str}')
log('My numbers are', 1, 2)
log('Hi there') # Much better
>>>
My numbers are: 1, 2
Hi there
還記得*表達式嗎(Item-13)?它會轉換成元組然后再傳遞給函數。
favorites = [7, 33, 99]
log('Favorite colors', *favorites)
>>>
Favorite colors: 7, 33, 99
所以對于*args這種情況,應該傳遞少的參數量。如果傳遞太大的generator會內存溢出。
def my_generator():
for i in range(10):
yield i
def my_func(*args):
print(args)
it = my_generator()
my_func(*it)
>>>
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
使用*args還有另一個問題是:盡量應該對應上參數和位置,如果對不上就會出錯。(比較難以檢查出來的bug)
def log(sequence, message, *values):
if not values:
print(f'{sequence} - {message}')
else:
values_str = ', '.join(str(x) for x in values)
print(f'{sequence} - {message}: {values_str}')
log(1, 'Favorites', 7, 33) # New with *args OK
log(1, 'Hi there') # New message only OK
log('Favorite numbers', 7, 33) # Old usage breaks
>>>
1 - Favorites: 7, 33
1 - Hi there
Favorite numbers - 7: 33
- Item23:用Keyword參數來提供可選的操作
可以按順序傳遞函數的參數。
def remainder(number, divisor):
return number % divisor
assert remainder(20, 7) == 6
也可以以(部分)亂序地方式來指定keyword達到相同的功能
remainder(20, 7)
remainder(20, divisor=7)
remainder(number=20, divisor=7)
remainder(divisor=7, number=20)
每個參數只能指定一次,下面是指定兩次導致的error
remainder(number=20, 7)
>>>
Traceback ...
SyntaxError: positional argument follows keyword argument
使用**操作符可以以字典的形式傳遞keywords和對應的values
my_kwargs = {
'number': 20,
'divisor': 7,
}
assert remainder(**my_kwargs) == 6
當然是可以混用的:
my_kwargs = {
'divisor': 7,
}
assert remainder(number=20, **my_kwargs) == 6
my_kwargs = {
'number': 20,
}
other_kwargs = {
'divisor': 7,
}
assert remainder(**my_kwargs, **other_kwargs) == 6
keywords的靈活性,可以比較明顯地在參數賦值的時候,增加可讀性。
def print_parameters(**kwargs):
for key, value in kwargs.items():
print(f'{key} = {value}')
print_parameters(alpha=1.5, beta=9, gamma=4)
>>>
alpha = 1.5
beta = 9
gamma = 4
假如以水桶流入水作為例子:
def flow_rate(weight_diff, time_diff):
return weight_diff / time_diff
weight_diff = 0.5
time_diff = 3
flow = flow_rate(weight_diff, time_diff)
print(f'{flow:.3} kg per second')
>>>
0.167 kg per second
此時需要知道某時間長度能有多重的水流,如下:
def flow_rate(weight_diff, time_diff, period):
return (weight_diff / time_diff) * period
引入了period,最簡單的是一秒的輸入:
def flow_rate(weight_diff, time_diff, period = 1):
return (weight_diff / time_diff) * period
此時,period變得可選了。
flow_per_second = flow_rate(weight_diff, time_diff)
flow_per_hour = flow_rate(weight_diff, time_diff,
period=3600)
如果想控制重量的單位,不是用kg作為單位的時候,可以在后面擴展新的參數名:
def flow_rate(weight_diff, time_diff, period=1, units_per_kg=1):
return ((weight_diff * units_per_kg) / time_diff) * period
這樣做,是在不影響原有函數的操作下進行的擴展(因為默認的參數已經指定了相同的行為。)
pounds_per_hour = flow_rate(weight_diff, time_diff, period=3600, units_per_kg=2.2)
最好還是指定以參數名的方式來調用函數,這樣顯得比較可讀和清晰。
- Item24:用None和Docstrings來指定動態的默認參數
當我們想要默認打印當前時間的日志時,可能會以如下程序操作:
from time import sleep
from datetime import datetime
def log(message, when=datetime.now()):
print(f'{when}: {message}')
log('Hi there!')
sleep(0.1)
log('Hello again!')
>>>
2019-07-06 14:06:15.120124: Hi there!
2019-07-06 14:06:15.120124: Hello again!
但是,并不像預期一樣,每次調用都調用一次now(),而是最開始加載模塊的時候只調用了一次。
而是應該,默認參數設置為None,然后加一段注釋,并且在程序中加以控制。
def log(message, when=None):
"""Log a message with a timestamp.
Args:
message: Message to print.
when: datetime of when the message occurred.
Defaults to the present time.
"""
if when is None:
when = datetime.now()
print(f'{when}: {message}')
log('Hi there!')
sleep(0.1)
log('Hello again!')
>>>
2019-07-06 14:06:15.222419: Hi there!
2019-07-06 14:06:15.322555: Hello again!
同樣地,下面的例子是類似的:
import json
def decode(data, default={}):
try:
return json.loads(data)
except ValueError:
return default
foo = decode('bad data')
foo['stuff'] = 5
bar = decode('also bad')
bar['meep'] = 1
print('Foo:', foo)
print('Bar:', bar)
>>>
Foo: {'stuff': 5, 'meep': 1}
Bar: {'stuff': 5, 'meep': 1}
default每次都是指定的那個{},所以在返回之后的操作時,都是同一個實例。
可以套一個Optional來指定類型注解可能為None或者datetime。
from typing import Optional
def log_typed(message: str,
when: Optional[datetime]=None) -> None:
"""Log a message with a timestamp.
Args:
message: Message to print.
when: datetime of when the message occurred.
Defaults to the present time.
"""
if when is None:
when = datetime.now()
print(f'{when}: {message}')
- Item25:用Keyword-Only和Positional-Only來加強表述清晰。
如果現在有一個實現安全除法的操作:
def safe_division(number, divisor,
ignore_overflow,
ignore_zero_division):
try:
return number / divisor
except OverflowError:
if ignore_overflow:
return 0
else:
raise
except ZeroDivisionError:
if ignore_zero_division:
return float('inf')
else:
raise
每次調用需要指定參數對應的數值,顯得可讀性較差。
result = safe_division(1.0, 10**500, True, False)
print(result)
>>>
0
result = safe_division(1.0, 0, False, True)
print(result)
>>>
inf
使用默認的參數,默認不開啟選項,然后在使用到哪一個特性的時候,就開啟哪個特性。
def safe_division(number, divisor,
ignore_overflow = False, # changed
ignore_zero_division = False): # changed
try:
return number / divisor
except OverflowError:
if ignore_overflow:
return 0
else:
raise
except ZeroDivisionError:
if ignore_zero_division:
return float('inf')
else:
raise
分別開啟忽略溢出、忽略除數為0的情況,可讀性有所提升。
result = safe_division_b(1.0, 10**500, ignore_overflow=True)
print(result)
result = safe_division_b(1.0, 0, ignore_zero_division=True)
print(result)
>>>
0
inf
但是,依然可以用這種順序的參數進行傳遞:
assert safe_division_b(1.0, 10**500, True, False) == 0
可以多加一個號,來強制“”后的參數使用keyword參數名進行調用。
def safe_division_c(number, divisor, *, # Changed
ignore_overflow=False,
ignore_zero_division=False):
safe_division_c(1.0, 10**500, True, False)
>>>
Traceback ...
TypeError: safe_division_c() takes 2 positional arguments but
4 were given
可以看到,使用keyword來進行參數的命名依然是成功的。
result = safe_division_c(1.0, 0, ignore_zero_division=True)
assert result == float('inf')
try:
result = safe_division_c(1.0, 0)
except ZeroDivisionError:
pass # Expected
然而,亂序填入參數的問題還是沒有解決:
assert safe_division_c(number=2, divisor=5) == 0.4
assert safe_division_c(divisor=5, number=2) == 0.4
assert safe_division_c(2, divisor=5) == 0.4
同時,當用戶依賴于變量名的時候,如果修改了函數的變量名,而不是依賴于位置的時候,
將會出錯:
def safe_division_c(numerator, denominator, *, # Changed
ignore_overflow=False,
ignore_zero_division=False):
...
safe_division_c(number=2, divisor=5)
>>>
Traceback ...
TypeError: safe_division_c() got an unexpected keyword
argument 'number'
python3.8引入了“/”號特性。可以只指定“/”前的參數為使用位置進行填寫的參數。
def safe_division_d(numerator, denominator, /, *, # Changed
ignore_overflow=False,
ignore_zero_division=False):
...
assert safe_division_d(2, 5) == 0.4 # 正常
safe_division_d(numerator=2, denominator=5) # 異常
>>>
Traceback ...
TypeError: safe_division_d() got some positional-only
arguments passed as keyword arguments: 'numerator, denominator'
"/"和"*"之間的參數,既可以直接按位置傳遞,也可以按keyword傳遞。
def safe_division_e(numerator, denominator, /,
ndigits=10, *, # Changed
ignore_overflow=False,
ignore_zero_division=False):
try:
fraction = numerator / denominator # Changed
return round(fraction, ndigits) # Changed
except OverflowError:
if ignore_overflow:
return 0
else:
raise
except ZeroDivisionError:
if ignore_zero_division:
return float('inf')
else:
raise
result = safe_division_e(22, 7)
print(result)
result = safe_division_e(22, 7, 5)
print(result)
result = safe_division_e(22, 7, ndigits=2)
print(result)
>>>
3.1428571429
3.14286
3.14
- Item26:用functools.wraps來定義函數的Decorators
裝飾器很有用,比如打log,debug,register方法等都可以使用。
比如,現在想要打印調用函數的參數和結果:
def trace(func):
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
print(f'{func.__name__}({args!r}, {kwargs!r}) '
f'-> {result!r}')
return result
return wrapper
可以來trace斐波那契函數:
@trace
def fibonacci(n):
"""Return the n-th Fibonacci number"""
if n in (0, 1):
return n
return (fibonacci(n - 2) + fibonacci(n - 1))
trace4的結果如下:
fibonacci(4)
>>>
fibonacci((0,), {}) -> 0
fibonacci((1,), {}) -> 1
fibonacci((2,), {}) -> 1
fibonacci((1,), {}) -> 1
fibonacci((0,), {}) -> 0
fibonacci((1,), {}) -> 1
fibonacci((2,), {}) -> 1
fibonacci((3,), {}) -> 2
fibonacci((4,), {}) -> 3
裝飾器的命名并不叫fibonacci:
print(fibonacci)
>>>
<function trace.<locals>.wrapper at 0x108955dc0>
trace函數返回在其主體中定義的wrapper,而不是原本的func。因此,使用help的時候,出來的不是fibonacci的注釋,而是wrapper的注釋:
help(fibonacci)
>>>
Help on function wrapper in module __main__:
wrapper(*args, **kwargs)
由于無法確定原始函數的位置,序列化也無法進行:
import pickle
pickle.dumps(fibonacci)
>>>
Traceback ...
AttributeError: Can't pickle local object 'trace.<locals>.wrapper'
解決方案是:在wrapper前面加上@wraps(func)注解。
from functools import wraps
def trace(func):
@wraps(func)
def wrapper(*args, **kwargs):
...
return wrapper
@trace
def fibonacci(n):
...
這樣,這兩個功能都能正常運行:
help(fibonacci)
>>>
Help on function fibonacci in module __main__:
fibonacci(n)
Return the n-th Fibonacci number
print(pickle.dumps(fibonacci))
>>>
b'\x80\x04\x95\x1a\x00\x00\x00\x00\x00\x00\x00\x8c\x08__main_
_\x94\x8c\tfibonacci\x94\x93\x94.'
使用wraps可以保持一些標準屬性,如:(name, module, annotations)。可以確保功能正確性。