- Item5:寫Helper函數而不是復雜的表達式。(DRY原則: 不要重復自己(Don’t repeat yourself.),盡量封裝常用的方法。)
# 解碼URL的請求串(參數?)
from urllib.parse import parse_qs
my_values = parse_qs('red=5&blue=0&green=',
keep_blank_values=True)
print(repr(my_values))
>>>
{'red': ['5'], 'blue': ['0'], 'green': ['']}
# 用get可以獲得對應的 參數
print('Red: ', my_values.get('red'))
print('Green: ', my_values.get('green'))
print('Opacity: ', my_values.get('opacity'))
>>>
Red: ['5']
Green: ['']
Opacity: None
# For query string 'red=5&blue=0&green='
red = my_values.get('red', [''])[0] or 0
green = my_values.get('green', [''])[0] or 0
opacity = my_values.get('opacity', [''])[0] or 0
print(f'Red: {red!r}')
print(f'Green: {green!r}')
print(f'Opacity: {opacity!r}')
>>>
Red: '5'
Green: 0
Opacity: 0
# 利用字符串判空的trick,or上一個0,即默認為0,最后轉為整型。雖然整體的表達整潔,但是難以理解,需要拆分語句來閱讀。
red = int(my_values.get('red', [''])[0] or 0)
# 轉換成三元組會稍微好一點,但是仍然不像封裝的函數。
red_str = my_values.get('red', [''])
red = int(red_str[0]) if red_str[0] else 0
# 拆分成原始的if-else語句段,更加可讀
green_str = my_values.get('green', [''])
if green_str[0]:
green = int(green_str[0])
else:
green = 0
# 最后抽象成一個函數
def get_first_int(values, key, default=0):
found = values.get(key, [''])
if found[0]:
return int(found[0])
return default
# 只要調用就可以,方便后續復用:
green = get_first_int(my_values, 'green')
- Item6:用多個變量賦值而不是下標的方式來拆分變量
# 用tuple把可變的字典變成不可變的元組。
snack_calories = {
'chips': 140,
'popcorn': 80,
'nuts': 190,
}
items = tuple(snack_calories.items())
print(items)
>>>
(('chips', 140), ('popcorn', 80), ('nuts', 190))
# 對元組可以進行下標訪問。
item = ('Peanut butter', 'Jelly')
first = item[0]
second = item[1]
print(first, 'and', second)
>>>
Peanut butter and Jelly
# 但是元組不允許下標賦值(不可變)
pair = ('Chocolate', 'Peanut butter')
pair[0] = 'Honey'
>>>
Traceback ...
TypeError: 'tuple' object does not support item assignment
# 可以用另一種方式來獲取元組中的元素
item = ('Peanut butter', 'Jelly')
first, second = item # Unpacking
print(first, 'and', second)
>>>
Peanut butter and Jelly
# 同理,可以這樣獲取元素,但是不建議:
favorite_snacks = {
'salty': ('pretzels', 100),
'sweet': ('cookies', 180),
'veggie': ('carrots', 20),
}
((type1, (name1, cals1)),
(type2, (name2, cals2)),
(type3, (name3, cals3))) = favorite_snacks.items()
print(f'Favorite {type1} is {name1} with {cals1} calories')
print(f'Favorite {type2} is {name2} with {cals2} calories')
print(f'Favorite {type3} is {name3} with {cals3} calories')
>>>
Favorite salty is pretzels with 100 calories
Favorite sweet is cookies with 180 calories
Favorite veggie is carrots with 20 calories
其中,最隱晦且有用的一點就是:利用此特性來完成數值交換。
# 傳統的數值交換
def bubble_sort(a):
for _ in range(len(a)):
for i in range(1, len(a)):
if a[i] < a[i-1]:
temp = a[i]
a[i] = a[i-1]
a[i-1] = temp
names = ['pretzels', 'carrots', 'arugula', 'bacon']
bubble_sort(names)
print(names)
>>>
['arugula', 'bacon', 'carrots', 'pretzels']
# 利用了unpacking特性的數值交換
def bubble_sort(a):
for _ in range(len(a)):
for i in range(1, len(a)):
if a[i] < a[i-1]:
a[i-1], a[i] = a[i], a[i-1] # Swap
names = ['pretzels', 'carrots', 'arugula', 'bacon']
bubble_sort(names)
print(names)
>>>
['arugula', 'bacon', 'carrots', 'pretzels']
可以這么理解:取得了右邊的數值,然后包裝成一個隱藏的元組(a[i], a[i-1]),然后再進行unpack(拆包)之后,分別賦值給a[i-1]和a[i]。
unpacking的特性可以再參照下邊的例子:
# 不用unpacking
snacks = [('bacon', 350), ('donut', 240), ('muffin', 190)]
for i in range(len(snacks)):
item = snacks[i]
name = item[0]
calories = item[1]
print(f'#{i+1}: {name} has {calories} calories')
>>>
#1: bacon has 350 calories
#2: donut has 240 calories
#3: muffin has 190 calories
# 用了unpacking
for rank, (name, calories) in enumerate(snacks, 1):
print(f'#{rank}: {name} has {calories} calories')
>>>
#1: bacon has 350 calories
#2: donut has 240 calories
#3: muffin has 190 calories
當元組的長度較短,且含義明確時,直接拆包來獲得對應的變量,比直接使用下標去取得數據,邏輯意義上更加明確,代碼的可讀性上也會更好。
- Item7:用enumerate而不是range
# 使用range來指定迭代次數
from random import randint
random_bits = 0
for i in range(32):
if randint(0, 1):
random_bits |= 1 << i
print(bin(random_bits))
>>>
0b11101000100100000111000010000001
# 直接for-in來迭代
flavor_list = ['vanilla', 'chocolate', 'pecan', 'strawberry']
for flavor in flavor_list:
print(f'{flavor} is delicious')
>>>
vanilla is delicious
chocolate is delicious
pecan is delicious
strawberry is delicious
# 當需要下標的時候,可能就會用到range
for i in range(len(flavor_list)):
flavor = flavor_list[i]
print(f'{i + 1}: {flavor}')
>>>
1: vanilla
2: chocolate
3: pecan
4: strawberry
# enumerate本質是iterator(迭代器),每次next返回元組(由下標以及迭代對象的元素組成)。
it = enumerate(flavor_list)
print(next(it))
print(next(it))
>>>
(0, 'vanilla')
(1, 'chocolate')
# 自然可以使用for-in以及unpacking的組合方式來迭代enumerate。
for i, flavor in enumerate(flavor_list, 1): # 可以指定下標的編號從多少開始。
print(f'{i}: {flavor}')
>>>
1: vanilla
2: chocolate
3: pecan
4: strawberry
- Item8:用zip來同時處理迭代器。
# 可以用列表推導式來生成一個list
names = ['Cecilia', 'Lise', 'Marie']
counts = [len(n) for n in names]
print(counts)
>>>
[7, 4, 5]
# 如果要找到最長的名字,用這種普通的range迭代比較麻煩。
longest_name = None
max_count = 0
for i in range(len(names)):
count = counts[i]
if count > max_count:
longest_name = names[i]
max_count = count
print(longest_name)
>>>
Cecilia
# enumerate稍微好一點
for i, name in enumerate(names):
count = counts[i]
if count > max_count:
longest_name = name
max_count = count
# zip則是結合兩個列表簡明完成任務。(用一個懶惰生成器包裹了兩個或更多的迭代器,每次next是所有迭代器的下一個值組成的元組)
for name, count in zip(names, counts):
if count > max_count:
longest_name = name
max_count = count
但是,需要注意zip的迭代器們可能存在長度不一的問題:
# 比如用了之前的counts,但是對names添加了一個新名字,打印的時候,會按照最短長度來迭代。
names.append('Rosalind')
for name, count in zip(names, counts):
print(name)
>>>
Cecilia
Lise
Marie
# 如果不確定長度是否一致,則可以使用itertools的zip_longest函數來迭代。(由于最后一個名字對應沒有數值,則用None替代。)
import itertools
for name, count in itertools.zip_longest(names, counts):
print(f'{name}: {count}')
>>>
Cecilia: 7 Lise: 4
Marie: 5
Rosalind: None
- Item9:避免在for和while循環后面用else代碼塊
for i in range(3):
print('Loop', i)
else:
print('Else block!')
>>>
Loop 0 Loop 1
Loop 2
Else block!
# else此處違反直覺:因為在try/except和if/else組合中,后者都是表示如果前者失敗的話,做什么操作。
# 而try/finally則是無論前者如何,最終后者會做什么操作。
# for/else此處則是相反的。
for i in range(3):
print('Loop', i)
if i == 1:
break
else:
print('Else block!')
>>>
Loop 0
Loop 1
# 如果是空列表,則直接執行else塊。
for x in []:
print('Never runs')
else:
print('For Else block!')
>>>
For Else block!
# while/else也是一樣
while False:
print('Never runs')
else:
print('While Else block!')
>>>
While Else block!
# 不通過break的時候,說明是互為質數。
a = 4
b = 9
for i in range(2, min(a, b) + 1):
print('Testing', i)
if a % i == 0 and b % i == 0:
print('Not coprime')
break
else:
print('Coprime')
>>>
Testing 2
Testing 3
Testing 4
Coprime
#(上面的場景適用,但實際不推薦使用。寫一個helper函數來做計算更合適。)
def coprime(a, b):
for i in range(2, min(a, b) + 1):
if a % i == 0 and b % i == 0:
return False
return True
assert coprime(4, 9)
assert not coprime(3, 6)
# 或者通過一個變量來表示是否互質。
def coprime_alternate(a, b):
is_coprime = True
for i in range(2, min(a, b) + 1):
if a % i == 0 and b % i == 0:
is_coprime = False
break
return is_coprime
assert coprime_alternate(4, 9)
assert not coprime_alternate(3, 6)
通過helper函數可以提高可讀性(相比for/else語句)。
- Item10:避免重復賦值表達式時可讀性差的問題(walrus操作符python3.8之后的語法,因為“:=”像眼睛和長牙,所以就叫walrus。)
# 水果籃子里面有什么
fresh_fruit = {
'apple': 10,
'banana': 8,
'lemon': 5,
}
# 做水果汁
def make_lemonade(count):
...
def out_of_stock():
...
# 原實現-1
count = fresh_fruit.get('lemon', 0)
if count:
make_lemonade(count)
else:
out_of_stock()
# 實際count只在if這一塊使用到,放到if前,似乎有點放大了作用域。
# 使用walrus重寫了上面的片段,實現-2
if count := fresh_fruit.get('lemon', 0):
make_lemonade(count)
else:
out_of_stock()
def make_cider(count):
...
count = fresh_fruit.get('apple', 0)
if count >= 4:
make_cider(count)
else:
out_of_stock()
# 同樣用walrus操作符重寫片段。
if (count := fresh_fruit.get('apple', 0)) >= 4:
make_cider(count)
else:
out_of_stock()
def slice_bananas(count):
...
class OutOfBananas(Exception):
pass
def make_smoothies(count):
...
pieces = 0
count = fresh_fruit.get('banana', 0)
if count >= 2:
pieces = slice_bananas(count)
try:
smoothies = make_smoothies(pieces)
except OutOfBananas:
out_of_stock()
# 閉包問題,為了邏輯通順,可以放在if/else里面來賦值變量。
count = fresh_fruit.get('banana', 0)
if count >= 2:
pieces = slice_bananas(count)
else:
pieces = 0
try:
smoothies = make_smoothies(pieces)
except OutOfBananas:
out_of_stock()
# 用walrus繼續來重寫
pieces = 0
if (count := fresh_fruit.get('banana', 0)) >= 2:
pieces = slice_bananas(count)
try:
smoothies = make_smoothies(pieces)
except OutOfBananas:
out_of_stock()
if (count := fresh_fruit.get('banana', 0)) >= 2:
pieces = slice_bananas(count)
else:
pieces = 0
try:
smoothies = make_smoothies(pieces)
except OutOfBananas:
out_of_stock()
# if/else實現switch的可讀性比較差
count = fresh_fruit.get('banana', 0)
if count >= 2:
pieces = slice_bananas(count)
to_enjoy = make_smoothies(pieces)
else:
count = fresh_fruit.get('apple', 0)
if count >= 4:
to_enjoy = make_cider(count)
else:
count = fresh_fruit.get('lemon', 0)
if count:
to_enjoy = make_lemonade(count)
else:
to_enjoy‘= 'Nothing'
# 用walrus來配合實現就稍微好一點
if (count := fresh_fruit.get('banana', 0)) >= 2:
pieces = slice_bananas(count)
to_enjoy = make_smoothies(pieces)
elif (count := fresh_fruit.get('apple', 0)) >= 4:
to_enjoy = make_cider(count)
elif count := fresh_fruit.get('lemon', 0):
to_enjoy = make_lemonade(count)
else:
to_enjoy = 'Nothing'
# while循環
def pick_fruit():
...
def make_juice(fruit, count):
...
bottles = []
fresh_fruit = pick_fruit()
while fresh_fruit:
for fruit, count in fresh_fruit.items():
batch = make_juice(fruit, count)
bottles.extend(batch)
fresh_fruit = pick_fruit()
# 整體為loop-and-a-half的結構。
bottles = []
while True: # Loop
fresh_fruit = pick_fruit()
if not fresh_fruit: # And a half
break
for fruit, count in fresh_fruit.items():
batch = make_juice(fruit, count)
bottles.extend(batch)
# 可以通過walrus表達式來重建,提升可讀性。
bottles = []
while fresh_fruit := pick_fruit():
for fruit, count in fresh_fruit.items():
batch = make_juice(fruit, count)
bottles.extend(batch)
列表和字典
一個常見的方式是用list來處理序列相關的工作。
dict是list的一個自然補充。鍵值對也倍叫做聯合數組或者哈希表。提供常數級的賦值和訪問的均攤時間復雜度。
- Item11:知道如何去切分序列
a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
print('Middle two: ', a[3:5])
print('All but ends:', a[1:7])
>>>
Middle two: ['d', 'e']
All but ends: ['b', 'c', 'd', 'e', 'f', 'g']
# 為了提高可讀性,開始為0或者結束為len應該省略。
assert a[:5] == a[0:5]
assert a[5:] == a[5:len(a)]
a[:] # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
a[:5] # ['a', 'b', 'c', 'd', 'e']
a[:-1] # ['a', 'b', 'c', 'd', 'e', 'f', 'g']
a[4:] # ['e', 'f', 'g', 'h']
a[-3:] # ['f', 'g', 'h']
a[2:5] # ['c', 'd', 'e']
a[2:-1] # ['c', 'd', 'e', 'f', 'g']
a[-3:-1] # ['f', 'g']
# 切片會避免一些問題。
first_twenty_items = a[:20]
last_twenty_items = a[-20:]
# 比如取到不存在的下標:
a[20]
>>>
Traceback ...
IndexError: list index out of range
# 可以通過切片來生成新的列表,不過這個列表是淺拷貝的新列表。
a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
b = a[3:]
print('Before: ', b)
b[1] = 99
print('After: ', b)
print('No change:', a)
>>>
Before: ['d', 'e', 'f', 'g', 'h']
After: ['d', 99, 'f', 'g', 'h']
No change: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
# 賦值長度可以不一樣,但是最終會收縮或者伸長。
print('Before ', a)
a[2:7] = [99, 22, 14]
print('After ', a)
>>>
Before ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
After ['a', 'b', 99, 22, 14, 'h']
print('Before ', a)
a[2:3] = [47, 11]
print('After ', a)
>>>
Before ['a', 'b', 99, 22, 14, 'h']
After ['a', 'b', 47, 11, 22, 14, 'h']
# 淺拷貝的問題,所以元素相同,但是對應的指針不同。
b = a[:]
assert b == a and b is not a
# 再次加深淺拷貝的印象,如果使用切片來賦值,最終是替換了指向的數值,而不是給一個新的列表:
b = a
print('Before a', a)
print('Before b', b)
a[:] = [101, 102, 103]
assert a is b # Still the same list object
print('After a ', a) # Now has different contents
print('After b ', b) # Same list, so same contents as a
>>>
Before a ['a', 'b', 47, 11, 22, 14, 'h']
Before b ['a', 'b', 47, 11, 22, 14, 'h']
After a [101, 102, 103]
After b [101, 102, 103]
- Item12:避免在一個表達式中切分和步幅(striding)操作
列表的切片語法:somelist[start:end:stride],建議用正的stride的同時,避免用start和end下標。
# 利用切片來奇數和偶數遍歷
x = ['red', 'orange', 'yellow', 'green', 'blue', 'purple']
odds = x[::2]
evens = x[1::2]
print(odds)
print(evens)
>>>
['red', 'yellow', 'blue']
['orange', 'green', 'purple']
# 利用切片striding為-1來反向遍歷(bytes和unicode的字符串都可以)
x = b'mongoose'
y = x[::-1]
print(y)
>>>
b'esoognom'
x = '壽司'
y = x[::-1]
print(y)
>>>
司壽
# 但是utf-8不行:
w = '壽司'
x = w.encode('utf-8')
y = x[::-1]
z = y.decode('utf-8')
>>>
Traceback ...
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb8 in position 0: invalid start byte
-1很有用嘛?看看下面的例子:
x = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
x[::2] # ['a', 'c', 'e', 'g']
x[::-2] # ['h', 'f', 'd', 'b']
# 如果結合起來呢?
x[2::2] # ['c', 'e', 'g']
x[-2::-2] # ['g', 'e', 'c', 'a']
x[-2:2:-2] # ['g', 'e']
x[2:2:-2] # []
由于語法上的密度較大,可讀性降低。可以先striding再slicing。(額外創建了淺拷貝,如果想優化,可以考慮itertools的islice方法)
y = x[::2] # ['a', 'c', 'e', 'g']
z = y[1:-1] # ['c', 'e']
- Item13:使用Catch-All Unpacking(捕捉-全部的拆包)而不是切片。
# 拆箱的時候,應該注意變量的個數。
car_ages = [0, 9, 4, 8, 7, 20, 19, 1, 6, 15]
car_ages_descending = sorted(car_ages, reverse=True)
oldest, second_oldest = car_ages_descending
>>>
Traceback ...
ValueError: too many values to unpack (expected 2)
# 當然可以通過下標來訪問,但是比較麻煩。
oldest = car_ages_descending[0]
second_oldest = car_ages_descending[1]
others = car_ages_descending[2:]
print(oldest, second_oldest, others)
>>>
20 19 [15, 9, 8, 7, 6, 4, 1, 0]
# 可以用帶星表達式(starred expression)來接收拆包的結果。
oldest, second_oldest, *others = car_ages_descending
print(oldest, second_oldest, others)
>>>
20 19 [15, 9, 8, 7, 6, 4, 1, 0]
oldest, *others, youngest = car_ages_descending
print(oldest, youngest, others)
*others, second_youngest, youngest = car_ages_descending
print(youngest, second_youngest, others)
>>>
20 0 [19, 15, 9, 8, 7, 6, 4, 1]
0 1 [20, 19, 15, 9, 8, 7, 6, 4]
# 不能在其本身用*表達式。
*others = car_ages_descending
>>>
Traceback ...
SyntaxError: starred assignment target must be in a list or tuple
# 單行拆出來不可以。
first, *middle, *second_middle, last = [1, 2, 3, 4]
>>>
Traceback ...
SyntaxError: two starred expressions in assignment
# 可以通過下面的例子提供對*表達式的直覺。
car_inventory = {
'Downtown': ('Silver Shadow', 'Pinto', 'DMC'),
'Airport': ('Skyline', 'Viper', 'Gremlin', 'Nova'),
}
((loc1, (best1, *rest1)),
(loc2, (best2, *rest2))) = car_inventory.items()
print(f'Best at {loc1} is {best1}, {len(rest1)} others')
print(f'Best at {loc2} is {best2}, {len(rest2)} others')
>>>
Best at Downtown is Silver Shadow, 2 others
Best at Airport is Skyline, 3 others
如果被拆包的部分比變量短,最終得到的是空列表。
short_list = [1, 2]
first, second, *rest = short_list
print(first, second, rest)
>>>
1 2 []
任意迭代器可以拆包
it = iter(range(1, 3))
first, second = it
print(f'{first} and {second}')
>>>
1 and 2
最后再通過處理CSV表格的例子來加深拆包和*表達式的使用:
def generate_csv():
yield ('Date', 'Make', 'Model', 'Year', 'Price')
...
# 不用*表達式拆行
all_csv_rows = list(generate_csv())
header = all_csv_rows[0]
rows = all_csv_rows[1:]
print('CSV Header:', header)
print('Row count: ', len(rows))
>>>
CSV Header: ('Date', 'Make', 'Model', 'Year', 'Price')
Row count: 200
# 用*表達式拆行。
it = generate_csv()
header, *rows = it
print('CSV Header:', header)
print('Row count: ', len(rows))
>>>
CSV Header: ('Date', 'Make', 'Model', 'Year', 'Price')
Row count: 200
小心*號表達式最終爆內存(因為返回的是一個list)
- Item14:通過復雜的標準和key參數來排序
sort可以對列表進行排序。
numbers = [93, 86, 11, 68, 70]
numbers.sort()
print(numbers)
>>>
[11, 68, 70, 86, 93]
class Tool:
def __init__(self, name, weight):
self.name = name
self.weight = weight
def __repr__(self):
return f'Tool({self.name!r}, {self.weight})'
tools = [
Tool('level', 3.5),
Tool('hammer', 1.25),
Tool('screwdriver', 0.5),
Tool('chisel', 0.25),
]
# 沒有指定對比的key,排序失效
tools.sort()
>>>
Traceback ...
TypeError: '<' not supported between instances of 'Tool' and
'Tool'
# 指定了使用Tool的name來排序。
print('Unsorted:', repr(tools))
tools.sort(key=lambda x: x.name)
print('\nSorted: ', tools)
>>>
Unsorted: [Tool('level', 3.5),
Tool('hammer', 1.25),
Tool('screwdriver', 0.5),
Tool('chisel', 0.25)]
Sorted: [Tool('chisel', 0.25),
Tool('hammer', 1.25),
Tool('level', 3.5),
Tool('screwdriver', 0.5)]
# 當然也可以指定用體重來排序。
tools.sort(key=lambda x: x.weight)
print('By weight:', tools)
>>>
By weight: [Tool('chisel', 0.25),
Tool('screwdriver', 0.5),
Tool('hammer', 1.25),
Tool('level', 3.5)]
# 此處為了確保按照字母順序排序,所以轉成了小寫(lower())
places = ['home', 'work', 'New York', 'Paris']
places.sort()
print('Case sensitive: ', places)
places.sort(key=lambda x: x.lower())
print('Case insensitive:', places)
>>>
Case sensitive: ['New York', 'Paris', 'home', 'work']
Case insensitive: ['home', 'New York', 'Paris', 'work']
同時排序多個條件怎么操作?最簡單的方式是用元組(默認是自然排序,意味著實現了lt等sort需要的方法)。
saw = (5, 'circular saw')
jackhammer = (40, 'jackhammer')
assert not (jackhammer < saw) # Matches expectations
drill = (4, 'drill')
sander = (4, 'sander')
assert drill[0] == sander[0] # Same weight
assert drill[1] < sander[1] # Alphabetically less
assert drill < sander # Thus, drill comes first
power_tools = [
Tool('drill', 4),
Tool('circular saw', 5),
Tool('jackhammer', 40),
Tool('sander', 4),
]
power_tools.sort(key=lambda x: (x.weight, x.name))
print(power_tools)
>>>
[Tool('drill', 4),
Tool('sander', 4),
Tool('circular saw', 5),
Tool('jackhammer', 40)]
# 當然可以指定reverse來使所有key逆序。
power_tools.sort(key=lambda x: (x.weight, x.name), reverse=True) # Makes all criteria
descending
print(power_tools)
>>>
[Tool('jackhammer', 40),
Tool('circular saw', 5),
Tool('sander', 4),
Tool('drill', 4)]
# 如果一部分需要逆序,一部分正序,怎么辦?(一元負號可以稍微解決這個問題,但是其不支持所有類型)
power_tools.sort(key=lambda x: (-x.weight, x.name))
print(power_tools)
>>>
[Tool('jackhammer', 40),
Tool('circular saw', 5),
Tool('drill', 4),
Tool('sander', 4)]
power_tools.sort(key=lambda x: (x.weight, -x.name),
reverse=True)
>>>
Traceback ...
TypeError: bad operand type for unary -: 'str'
迫不得已才需要用到多個sort的組合。
power_tools.sort(key=lambda x: x.name) # Name ascending
power_tools.sort(key=lambda x: x.weight, # Weight descending
reverse=True)
print(power_tools)
>>>
[Tool('jackhammer', 40),
Tool('circular saw', 5),
Tool('drill', 4),
Tool('sander', 4)]
先對name,再對weight進行排序,所以得到最終的結果。相反,也可以先對weight排序,再對name排序,看個人的需求。
- Item15:注意dict的插入順序
Python3.5之前,迭代一個dict的時候,順序是隨機的。也就是和原本插入的順序不一致。這個特性使得測試樣例比較難以復現,難以debug。(主要是由于哈希表的實現用了內置的hash函數和隨機種子)
# Python 3.5
baby_names = {
'cat': 'kitten',
'dog': 'puppy',
}
print(baby_names)
>>>
{'dog': 'puppy', 'cat': 'kitten'}
3.6之后正常:
baby_names = {
'cat': 'kitten',
'dog': 'puppy',
}
print(baby_names)
>>>
{'cat': 'kitten', 'dog': 'puppy'}
因此,3.6之前的依賴于dict的方法(keys, values, items, popitem),都會有這種驚喜存在:
# Python 3.5
print(list(baby_names.keys()))
print(list(baby_names.values()))
print(list(baby_names.items()))
print(baby_names.popitem()) # Randomly chooses an item
>>>
['dog', 'cat']
['puppy', 'kitten']
[('dog', 'puppy'), ('cat', 'kitten')]
('dog', 'puppy')
# 3.6之后
print(list(baby_names.keys()))
print(list(baby_names.values()))
print(list(baby_names.items()))
print(baby_names.popitem()) # Last item inserted
>>>
['cat', 'dog']
['kitten', 'puppy']
[('cat', 'kitten'), ('dog', 'puppy')]
('dog', 'puppy')
比如用**kwargs捕捉所有鍵值對,由于順序問題,難以debug。
# Python 3.5
def my_func(**kwargs):
for key, value in kwargs.items():
print('%s = %s' % (key, value))
my_func(goose='gosling', kangaroo='joey')
>>>
kangaroo = joey
goose = gosling
def my_func(**kwargs):
for key, value in kwargs.items():
print(f'{key} = {value}')
my_func(goose='gosling', kangaroo='joey')
>>>
goose = gosling
kangaroo = joey
類也用dict作為實例字典。早期版本也是存在順序問題。
# Python 3.5
class MyClass:
def __init__(self):
self.alligator = 'hatchling'
self.elephant = 'calf'
a = MyClass()
for key, value in a.__dict__.items():
print('%s = %s' % (key, value))
>>>
elephant = calf
alligator = hatchling
# 之后的版本
class MyClass:
def __init__(self):
self.alligator = 'hatchling'
self.elephant = 'calf'
a = MyClass()
for key, value in a.__dict__.items():
print(f'{key} = {value}')
>>>
alligator = hatchling
elephant = calf
盡管從3.7開始,dict和collections的OrderedDict的表現相似,但是如果經常插入和popitem(比如實現LRU緩存),OrderedDict可能比dict更適合。
再看一個例子。比如現在有動物的投票數字典,然后需求是得到投票數最多的動物,作為贏家,那么可以實現如下:
votes = {
'otter': 1281,
'polar bear': 587,
'fox': 863,
}
def populate_ranks(votes, ranks):
names = list(votes.keys())
names.sort(key=votes.get, reverse=True) # 通過得到對應的票數來進行排序。
for i, name in enumerate(names, 1):
ranks[name] = i
def get_winner(ranks):
return next(iter(ranks))
ranks = {}
populate_ranks(votes, ranks)
print(ranks)
winner = get_winner(ranks)
print(winner)
>>>
{'otter': 1, 'fox': 2, 'polar bear': 3}
otter
但是,現在需求變了,現在要的不是投票數順序來遍歷,而是名字的順序來遍歷了。此時可以用collections.abc來定義一個新的字典類(引入了dict-like的類型,但是會引入一些奇怪的bugs):
from collections.abc import MutableMapping
class SortedDict(MutableMapping):
def __init__(self):
self.data = {}
def __getitem__(self, key):
return self.data[key]
def __setitem__(self, key, value):
self.data[key] = value
def __delitem__(self, key):
del self.data[key]
def __iter__(self):
keys = list(self.data.keys())
keys.sort()
for key in keys:
yield key
def __len__(self):
return len(self.data)
sorted_ranks = SortedDict()
populate_ranks(votes, sorted_ranks)
print(sorted_ranks.data)
winner = get_winner(sorted_ranks)
print(winner)
>>>
{'otter': 1, 'fox': 2, 'polar bear': 3}
fox
但是,存在一個問題就是,函數的參數類型并不明顯(主要是Dict的順序問題。):sorted_ranks依賴于populate_ranks的順序。而SortedDict打破了這個假設。可以通過:1)修改get_winner函數,或者2)直接拋出異常,或者3)顯式限制參數的類型(要指定strict運行)來解決這個問題。
def get_winner(ranks):
for name, rank in ranks.items():
if rank == 1:
return name
winner = get_winner(sorted_ranks)
print(winner)
>>>
otter
def get_winner(ranks):
if not isinstance(ranks, dict):
raise TypeError('must provide a dict instance')
return next(iter(ranks))
get_winner(sorted_ranks)
>>>
Traceback ...
TypeError: must provide a dict instance
from typing import Dict, MutableMapping
def populate_ranks(votes: Dict[str, int],
ranks: Dict[str, int]) -> None:
names = list(votes.keys())
names.sort(key=votes.get, reverse=True)
for i, name in enumerate(names, 1):
ranks[name] = i
def get_winner(ranks: Dict[str, int]) -> str:
return next(iter(ranks))
class SortedDict(MutableMapping[str, int]):
...
votes = {
'otter': 1281,
'polar bear': 587,
'fox': 863,
}
sorted_ranks = SortedDict()
populate_ranks(votes, sorted_ranks)
print(sorted_ranks.data)
winner = get_winner(sorted_ranks)
print(winner)
$ python3 -m mypy --strict example.py
.../example.py:48: error: Argument 2 to "populate_ranks" has incompatible type "SortedDict"; expected "Dict[str, int]"
.../example.py:50: error: Argument 1 to "get_winner" has incompatible type "SortedDict"; expected "Dict[str, int]"