Effective Python 筆記摘錄2

  • Item5:寫Helper函數而不是復雜的表達式。(DRY原則: 不要重復自己(Don’t repeat yourself.),盡量封裝常用的方法。)
# 解碼URL的請求串(參數?)
from urllib.parse import parse_qs
my_values = parse_qs('red=5&blue=0&green=',
                     keep_blank_values=True)
print(repr(my_values))
>>>
{'red': ['5'], 'blue': ['0'], 'green': ['']}

# 用get可以獲得對應的 參數
print('Red:     ', my_values.get('red'))
print('Green:   ', my_values.get('green'))
print('Opacity: ', my_values.get('opacity'))
>>>
Red:      ['5']
Green:    ['']
Opacity:  None

# For query string 'red=5&blue=0&green='
red = my_values.get('red', [''])[0] or 0
green = my_values.get('green', [''])[0] or 0
opacity = my_values.get('opacity', [''])[0] or 0
print(f'Red:     {red!r}')
print(f'Green:   {green!r}')
print(f'Opacity: {opacity!r}')
>>>
Red:     '5'
Green:   0
Opacity: 0

# 利用字符串判空的trick,or上一個0,即默認為0,最后轉為整型。雖然整體的表達整潔,但是難以理解,需要拆分語句來閱讀。
red = int(my_values.get('red', [''])[0] or 0)

# 轉換成三元組會稍微好一點,但是仍然不像封裝的函數。
red_str = my_values.get('red', [''])
red = int(red_str[0]) if red_str[0] else 0

# 拆分成原始的if-else語句段,更加可讀
green_str = my_values.get('green', [''])
if green_str[0]:
    green = int(green_str[0])
else:
green = 0

# 最后抽象成一個函數
def get_first_int(values, key, default=0):
    found = values.get(key, [''])
    if found[0]:
       return int(found[0])
    return default

# 只要調用就可以,方便后續復用:
green = get_first_int(my_values, 'green')

  • Item6:用多個變量賦值而不是下標的方式來拆分變量
# 用tuple把可變的字典變成不可變的元組。
snack_calories = {
    'chips': 140,
    'popcorn': 80,
    'nuts': 190,
}
items = tuple(snack_calories.items())
print(items)
>>>
(('chips', 140), ('popcorn', 80), ('nuts', 190))

# 對元組可以進行下標訪問。
item = ('Peanut butter', 'Jelly')
first = item[0]
second = item[1]
print(first, 'and', second)
>>>
Peanut butter and Jelly

# 但是元組不允許下標賦值(不可變)
pair = ('Chocolate', 'Peanut butter')
pair[0] = 'Honey'
>>>
Traceback ...
TypeError: 'tuple' object does not support item assignment

# 可以用另一種方式來獲取元組中的元素
item = ('Peanut butter', 'Jelly')
first, second = item # Unpacking
print(first, 'and', second)
>>>
Peanut butter and Jelly

# 同理,可以這樣獲取元素,但是不建議:
favorite_snacks = {
    'salty': ('pretzels', 100),
    'sweet': ('cookies', 180),
    'veggie': ('carrots', 20),
}
((type1, (name1, cals1)),
 (type2, (name2, cals2)),
 (type3, (name3, cals3))) = favorite_snacks.items()
print(f'Favorite {type1} is {name1} with {cals1} calories')
print(f'Favorite {type2} is {name2} with {cals2} calories')
print(f'Favorite {type3} is {name3} with {cals3} calories')
>>>
Favorite salty is pretzels with 100 calories
Favorite sweet is cookies with 180 calories
Favorite veggie is carrots with 20 calories

其中,最隱晦且有用的一點就是:利用此特性來完成數值交換。

# 傳統的數值交換
def bubble_sort(a):
    for _ in range(len(a)):
        for i in range(1, len(a)):
            if a[i] < a[i-1]:
                temp = a[i]
                a[i] = a[i-1]
                a[i-1] = temp
names = ['pretzels', 'carrots', 'arugula', 'bacon']
bubble_sort(names)
print(names)
>>>
['arugula', 'bacon', 'carrots', 'pretzels']

# 利用了unpacking特性的數值交換
def bubble_sort(a):
    for _ in range(len(a)):
        for i in range(1, len(a)):
            if a[i] < a[i-1]:
                a[i-1], a[i] = a[i], a[i-1] # Swap
names = ['pretzels', 'carrots', 'arugula', 'bacon']
bubble_sort(names)
print(names)
>>>
['arugula', 'bacon', 'carrots', 'pretzels']

可以這么理解:取得了右邊的數值,然后包裝成一個隱藏的元組(a[i], a[i-1]),然后再進行unpack(拆包)之后,分別賦值給a[i-1]和a[i]。

unpacking的特性可以再參照下邊的例子:

# 不用unpacking
snacks = [('bacon', 350), ('donut', 240), ('muffin', 190)]
for i in range(len(snacks)):
    item = snacks[i]
    name = item[0]
    calories = item[1]
    print(f'#{i+1}: {name} has {calories} calories')
>>>
#1: bacon has 350 calories
#2: donut has 240 calories
#3: muffin has 190 calories

# 用了unpacking
for rank, (name, calories) in enumerate(snacks, 1):
    print(f'#{rank}: {name} has {calories} calories')
>>>
#1: bacon has 350 calories
#2: donut has 240 calories
#3: muffin has 190 calories

當元組的長度較短,且含義明確時,直接拆包來獲得對應的變量,比直接使用下標去取得數據,邏輯意義上更加明確,代碼的可讀性上也會更好。


  • Item7:用enumerate而不是range
# 使用range來指定迭代次數
from random import randint
random_bits = 0
for i in range(32):
    if randint(0, 1):
        random_bits |= 1 << i
print(bin(random_bits))
>>>
0b11101000100100000111000010000001

# 直接for-in來迭代
flavor_list = ['vanilla', 'chocolate', 'pecan', 'strawberry']
for flavor in flavor_list:
    print(f'{flavor} is delicious')
>>>
vanilla is delicious
chocolate is delicious
pecan is delicious
strawberry is delicious

# 當需要下標的時候,可能就會用到range
for i in range(len(flavor_list)):
    flavor = flavor_list[i]
    print(f'{i + 1}: {flavor}')
>>>
1: vanilla
2: chocolate
3: pecan
4: strawberry

# enumerate本質是iterator(迭代器),每次next返回元組(由下標以及迭代對象的元素組成)。
it = enumerate(flavor_list)
print(next(it))
print(next(it))
>>>
(0, 'vanilla')
(1, 'chocolate')

# 自然可以使用for-in以及unpacking的組合方式來迭代enumerate。
for i, flavor in enumerate(flavor_list, 1): # 可以指定下標的編號從多少開始。
    print(f'{i}: {flavor}')
>>>
1: vanilla
2: chocolate
3: pecan
4: strawberry

  • Item8:用zip來同時處理迭代器。
# 可以用列表推導式來生成一個list
names = ['Cecilia', 'Lise', 'Marie']
counts = [len(n) for n in names]
print(counts)
>>>
[7, 4, 5]

# 如果要找到最長的名字,用這種普通的range迭代比較麻煩。
longest_name = None
max_count = 0
for i in range(len(names)):
    count = counts[i]
    if count > max_count:
       longest_name = names[i]
       max_count = count
print(longest_name)
>>>
Cecilia

# enumerate稍微好一點
for i, name in enumerate(names):
    count = counts[i]
    if count > max_count:
        longest_name = name
        max_count = count

# zip則是結合兩個列表簡明完成任務。(用一個懶惰生成器包裹了兩個或更多的迭代器,每次next是所有迭代器的下一個值組成的元組)
for name, count in zip(names, counts):
    if count > max_count:
        longest_name = name
        max_count = count

但是,需要注意zip的迭代器們可能存在長度不一的問題:

# 比如用了之前的counts,但是對names添加了一個新名字,打印的時候,會按照最短長度來迭代。
names.append('Rosalind')
for name, count in zip(names, counts):
    print(name)
>>>
Cecilia
Lise
Marie

# 如果不確定長度是否一致,則可以使用itertools的zip_longest函數來迭代。(由于最后一個名字對應沒有數值,則用None替代。)
import itertools
for name, count in itertools.zip_longest(names, counts):
    print(f'{name}: {count}')
>>>
Cecilia: 7 Lise: 4
Marie: 5
Rosalind: None

  • Item9:避免在for和while循環后面用else代碼塊
for i in range(3):
    print('Loop', i)
else:
    print('Else block!')
>>>
Loop 0 Loop 1
Loop 2
Else block!

# else此處違反直覺:因為在try/except和if/else組合中,后者都是表示如果前者失敗的話,做什么操作。
# 而try/finally則是無論前者如何,最終后者會做什么操作。
# for/else此處則是相反的。
for i in range(3):
    print('Loop', i)
    if i == 1:
        break
else:
    print('Else block!')
>>>
Loop 0
Loop 1

# 如果是空列表,則直接執行else塊。
for x in []:
    print('Never runs')
else:
    print('For Else block!')
>>>
For Else block!

# while/else也是一樣
while False:
    print('Never runs')
else:
    print('While Else block!')
>>>
While Else block!

# 不通過break的時候,說明是互為質數。
a = 4
b = 9
for i in range(2, min(a, b) + 1):
    print('Testing', i)
    if a % i == 0 and b % i == 0:
        print('Not coprime')
        break
else:
    print('Coprime')
>>>
Testing 2
Testing 3
Testing 4
Coprime

#(上面的場景適用,但實際不推薦使用。寫一個helper函數來做計算更合適。)
def coprime(a, b):
    for i in range(2, min(a, b) + 1):
        if a % i == 0 and b % i == 0:
            return False
    return True
assert coprime(4, 9)
assert not coprime(3, 6)

# 或者通過一個變量來表示是否互質。
def coprime_alternate(a, b):
    is_coprime = True
    for i in range(2, min(a, b) + 1):
        if a % i == 0 and b % i == 0:
            is_coprime = False
            break
    return is_coprime
assert coprime_alternate(4, 9)
assert not coprime_alternate(3, 6)

通過helper函數可以提高可讀性(相比for/else語句)。


  • Item10:避免重復賦值表達式時可讀性差的問題(walrus操作符python3.8之后的語法,因為“:=”像眼睛和長牙,所以就叫walrus。)
# 水果籃子里面有什么
fresh_fruit = {
    'apple': 10,
    'banana': 8,
    'lemon': 5,
}
# 做水果汁
def make_lemonade(count):
    ...
def out_of_stock():
    ...
# 原實現-1
count = fresh_fruit.get('lemon', 0)
if count:
    make_lemonade(count)
else:
    out_of_stock()
# 實際count只在if這一塊使用到,放到if前,似乎有點放大了作用域。
# 使用walrus重寫了上面的片段,實現-2
if count := fresh_fruit.get('lemon', 0):
    make_lemonade(count)
else:
    out_of_stock()
def make_cider(count):
    ...
count = fresh_fruit.get('apple', 0)
if count >= 4:
    make_cider(count)
else:
    out_of_stock()

# 同樣用walrus操作符重寫片段。
if (count := fresh_fruit.get('apple', 0)) >= 4:
    make_cider(count)
else:
    out_of_stock()
def slice_bananas(count):
    ...
class OutOfBananas(Exception):
    pass
def make_smoothies(count):
    ...
pieces = 0
count = fresh_fruit.get('banana', 0)
if count >= 2:
    pieces = slice_bananas(count)
try:
    smoothies = make_smoothies(pieces)
except OutOfBananas:
    out_of_stock()

# 閉包問題,為了邏輯通順,可以放在if/else里面來賦值變量。
count = fresh_fruit.get('banana', 0)
if count >= 2:
    pieces = slice_bananas(count)
else:
    pieces = 0
try:
    smoothies = make_smoothies(pieces)
except OutOfBananas:
    out_of_stock()

# 用walrus繼續來重寫
pieces = 0
if (count := fresh_fruit.get('banana', 0)) >= 2:
    pieces = slice_bananas(count)
try:
    smoothies = make_smoothies(pieces)
except OutOfBananas:
    out_of_stock()

if (count := fresh_fruit.get('banana', 0)) >= 2:
    pieces = slice_bananas(count)
else:
    pieces = 0
try:
    smoothies = make_smoothies(pieces)
except OutOfBananas:
    out_of_stock()
# if/else實現switch的可讀性比較差
count = fresh_fruit.get('banana', 0)
if count >= 2:
    pieces = slice_bananas(count)
    to_enjoy = make_smoothies(pieces)
else:
    count = fresh_fruit.get('apple', 0)
    if count >= 4:
        to_enjoy = make_cider(count)
    else:
        count = fresh_fruit.get('lemon', 0)
        if count:
           to_enjoy = make_lemonade(count)
        else:
           to_enjoy‘= 'Nothing'

# 用walrus來配合實現就稍微好一點
if (count := fresh_fruit.get('banana', 0)) >= 2:
    pieces = slice_bananas(count)
    to_enjoy = make_smoothies(pieces)
elif (count := fresh_fruit.get('apple', 0)) >= 4:
    to_enjoy = make_cider(count)
elif count := fresh_fruit.get('lemon', 0):
    to_enjoy = make_lemonade(count)
else:
    to_enjoy = 'Nothing'
# while循環
def pick_fruit():
    ...
def make_juice(fruit, count):
    ...
bottles = []
fresh_fruit = pick_fruit()
while fresh_fruit:
    for fruit, count in fresh_fruit.items():
        batch = make_juice(fruit, count)
        bottles.extend(batch)
    fresh_fruit = pick_fruit()

# 整體為loop-and-a-half的結構。
bottles = []
while True:                    # Loop
    fresh_fruit = pick_fruit()
    if not fresh_fruit:        # And a half
        break
    for fruit, count in fresh_fruit.items():
        batch = make_juice(fruit, count)
        bottles.extend(batch)

# 可以通過walrus表達式來重建,提升可讀性。
bottles = []
while fresh_fruit := pick_fruit():
    for fruit, count in fresh_fruit.items():
        batch = make_juice(fruit, count)
        bottles.extend(batch)

列表和字典

一個常見的方式是用list來處理序列相關的工作。
dict是list的一個自然補充。鍵值對也倍叫做聯合數組或者哈希表。提供常數級的賦值和訪問的均攤時間復雜度。

  • Item11:知道如何去切分序列
a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
print('Middle two:  ', a[3:5])
print('All but ends:', a[1:7])
>>>
Middle two:   ['d', 'e']
All but ends: ['b', 'c', 'd', 'e', 'f', 'g']

# 為了提高可讀性,開始為0或者結束為len應該省略。
assert a[:5] == a[0:5]
assert a[5:] == a[5:len(a)]

a[:]      # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
a[:5]     # ['a', 'b', 'c', 'd', 'e']
a[:-1]    # ['a', 'b', 'c', 'd', 'e', 'f', 'g']
a[4:]     #                     ['e', 'f', 'g', 'h']
a[-3:]    #                          ['f', 'g', 'h']
a[2:5]    #           ['c', 'd', 'e']
a[2:-1]   #           ['c', 'd', 'e', 'f', 'g']
a[-3:-1]  #                          ['f', 'g']

# 切片會避免一些問題。
first_twenty_items = a[:20]
last_twenty_items = a[-20:]

# 比如取到不存在的下標:
a[20]
>>>
Traceback ...
IndexError: list index out of range
# 可以通過切片來生成新的列表,不過這個列表是淺拷貝的新列表。
a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
b = a[3:]
print('Before:   ', b)
b[1] = 99
print('After:    ', b)
print('No change:', a)
>>>
Before:    ['d', 'e', 'f', 'g', 'h']
After:     ['d', 99, 'f', 'g', 'h']
No change: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
# 賦值長度可以不一樣,但是最終會收縮或者伸長。
print('Before ', a)
a[2:7] = [99, 22, 14]
print('After  ', a)
>>>
Before  ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
After   ['a', 'b', 99, 22, 14, 'h']


print('Before ', a)
a[2:3] = [47, 11]
print('After  ', a)
>>>
Before  ['a', 'b', 99, 22, 14, 'h']
After   ['a', 'b', 47, 11, 22, 14, 'h']
# 淺拷貝的問題,所以元素相同,但是對應的指針不同。
b = a[:]
assert b == a and b is not a
# 再次加深淺拷貝的印象,如果使用切片來賦值,最終是替換了指向的數值,而不是給一個新的列表:
b = a
print('Before a', a)
print('Before b', b)
a[:] = [101, 102, 103]
assert a is b             # Still the same list object
print('After a ', a)      # Now has different contents
print('After b ', b)      # Same list, so same contents as a
>>>
Before a ['a', 'b', 47, 11, 22, 14, 'h']
Before b ['a', 'b', 47, 11, 22, 14, 'h']
After a  [101, 102, 103]
After b  [101, 102, 103]

  • Item12:避免在一個表達式中切分和步幅(striding)操作
    列表的切片語法:somelist[start:end:stride],建議用正的stride的同時,避免用start和end下標。
# 利用切片來奇數和偶數遍歷
x = ['red', 'orange', 'yellow', 'green', 'blue', 'purple']
odds = x[::2]
evens = x[1::2]
print(odds)
print(evens)
>>>
['red', 'yellow', 'blue']
['orange', 'green', 'purple']
# 利用切片striding為-1來反向遍歷(bytes和unicode的字符串都可以)
x = b'mongoose'
y = x[::-1]
print(y)
>>>
b'esoognom'

x = '壽司'
y = x[::-1]
print(y)
>>>
司壽

# 但是utf-8不行:
w = '壽司'
x = w.encode('utf-8')
y = x[::-1]
z = y.decode('utf-8')
>>>
Traceback ...
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb8 in position 0: invalid start byte

-1很有用嘛?看看下面的例子:

x = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
x[::2]   # ['a', 'c', 'e', 'g']
x[::-2]  # ['h', 'f', 'd', 'b']

# 如果結合起來呢?
x[2::2]     # ['c', 'e', 'g']
x[-2::-2]   # ['g', 'e', 'c', 'a']
x[-2:2:-2]  # ['g', 'e']
x[2:2:-2]   # []

由于語法上的密度較大,可讀性降低。可以先striding再slicing。(額外創建了淺拷貝,如果想優化,可以考慮itertools的islice方法)

y = x[::2]   # ['a', 'c', 'e', 'g']
z = y[1:-1]  # ['c', 'e']

  • Item13:使用Catch-All Unpacking(捕捉-全部的拆包)而不是切片。
# 拆箱的時候,應該注意變量的個數。
car_ages = [0, 9, 4, 8, 7, 20, 19, 1, 6, 15]
car_ages_descending = sorted(car_ages, reverse=True)
oldest, second_oldest = car_ages_descending
>>>
Traceback ...
ValueError: too many values to unpack (expected 2)

# 當然可以通過下標來訪問,但是比較麻煩。
oldest = car_ages_descending[0]
second_oldest = car_ages_descending[1]
others = car_ages_descending[2:]
print(oldest, second_oldest, others)
>>>
20 19 [15, 9, 8, 7, 6, 4, 1, 0]

# 可以用帶星表達式(starred expression)來接收拆包的結果。
oldest, second_oldest, *others = car_ages_descending
print(oldest, second_oldest, others)
>>>
20 19 [15, 9, 8, 7, 6, 4, 1, 0]

oldest, *others, youngest = car_ages_descending
print(oldest, youngest, others)
*others, second_youngest, youngest = car_ages_descending
print(youngest, second_youngest, others)
>>>
20 0 [19, 15, 9, 8, 7, 6, 4, 1]
0 1 [20, 19, 15, 9, 8, 7, 6, 4]

# 不能在其本身用*表達式。
*others = car_ages_descending
>>>
Traceback ...
SyntaxError: starred assignment target must be in a list or tuple
# 單行拆出來不可以。
first, *middle, *second_middle, last = [1, 2, 3, 4]
>>>
Traceback ...
SyntaxError: two starred expressions in assignment

# 可以通過下面的例子提供對*表達式的直覺。
car_inventory = {
    'Downtown': ('Silver Shadow', 'Pinto', 'DMC'),
    'Airport': ('Skyline', 'Viper', 'Gremlin', 'Nova'),
}
((loc1, (best1, *rest1)),
 (loc2, (best2, *rest2))) = car_inventory.items()
print(f'Best at {loc1} is {best1}, {len(rest1)} others')
print(f'Best at {loc2} is {best2}, {len(rest2)} others')
>>>
Best at Downtown is Silver Shadow, 2 others
Best at Airport is Skyline, 3 others

如果被拆包的部分比變量短,最終得到的是空列表。

short_list = [1, 2]
first, second, *rest = short_list
print(first, second, rest)
>>>
1 2 []

任意迭代器可以拆包

it = iter(range(1, 3))
first, second = it
print(f'{first} and {second}')
>>>
1 and 2

最后再通過處理CSV表格的例子來加深拆包和*表達式的使用:

def generate_csv():
    yield ('Date', 'Make', 'Model', 'Year', 'Price')
    ...

# 不用*表達式拆行
all_csv_rows = list(generate_csv())
header = all_csv_rows[0]
rows = all_csv_rows[1:]
print('CSV Header:', header)
print('Row count: ', len(rows))
>>>
CSV Header: ('Date', 'Make', 'Model', 'Year', 'Price')
Row count:  200

# 用*表達式拆行。
it = generate_csv()
header, *rows = it
print('CSV Header:', header)
print('Row count: ', len(rows))
>>>
CSV Header: ('Date', 'Make', 'Model', 'Year', 'Price')
Row count:  200

小心*號表達式最終爆內存(因為返回的是一個list)


  • Item14:通過復雜的標準和key參數來排序
    sort可以對列表進行排序。
numbers = [93, 86, 11, 68, 70]
numbers.sort()
print(numbers)
>>>
[11, 68, 70, 86, 93]
class Tool:
    def __init__(self, name, weight):
        self.name = name
        self.weight = weight
    def __repr__(self):
        return f'Tool({self.name!r}, {self.weight})'
tools = [
    Tool('level', 3.5),
    Tool('hammer', 1.25),
    Tool('screwdriver', 0.5),
    Tool('chisel', 0.25),
]

# 沒有指定對比的key,排序失效
tools.sort()
>>>
Traceback ...
TypeError: '<' not supported between instances of 'Tool' and
'Tool'

# 指定了使用Tool的name來排序。
print('Unsorted:', repr(tools))
tools.sort(key=lambda x: x.name)
print('\nSorted: ', tools)
>>>
Unsorted: [Tool('level',        3.5),
           Tool('hammer',       1.25),
           Tool('screwdriver',  0.5),
           Tool('chisel',       0.25)]
Sorted: [Tool('chisel',         0.25),
         Tool('hammer',         1.25),
         Tool('level',          3.5),
         Tool('screwdriver',    0.5)]

# 當然也可以指定用體重來排序。
tools.sort(key=lambda x: x.weight)
print('By weight:', tools)
>>>
By weight: [Tool('chisel',      0.25),
            Tool('screwdriver', 0.5),
            Tool('hammer',      1.25),
            Tool('level',       3.5)]
# 此處為了確保按照字母順序排序,所以轉成了小寫(lower())
places = ['home', 'work', 'New York', 'Paris']
places.sort()
print('Case sensitive: ', places)
places.sort(key=lambda x: x.lower())
print('Case insensitive:', places)
>>>
Case sensitive: ['New York', 'Paris', 'home', 'work']
Case insensitive: ['home', 'New York', 'Paris', 'work']

同時排序多個條件怎么操作?最簡單的方式是用元組(默認是自然排序,意味著實現了lt等sort需要的方法)。

saw = (5, 'circular saw')
jackhammer = (40, 'jackhammer')
assert not (jackhammer < saw) # Matches expectations

drill = (4, 'drill')
sander = (4, 'sander')
assert drill[0] == sander[0] # Same weight
assert drill[1] < sander[1]  # Alphabetically less
assert drill < sander        # Thus, drill comes first
power_tools = [
    Tool('drill', 4),
    Tool('circular saw', 5),
    Tool('jackhammer', 40),
    Tool('sander', 4),
]
power_tools.sort(key=lambda x: (x.weight, x.name))
print(power_tools)
>>>
[Tool('drill',        4),
 Tool('sander',       4),
 Tool('circular saw', 5),
 Tool('jackhammer',   40)]

# 當然可以指定reverse來使所有key逆序。
power_tools.sort(key=lambda x: (x.weight, x.name), reverse=True) # Makes all criteria 
descending
print(power_tools)
>>>
[Tool('jackhammer',   40),
 Tool('circular saw', 5),
 Tool('sander',       4),
 Tool('drill',        4)]

# 如果一部分需要逆序,一部分正序,怎么辦?(一元負號可以稍微解決這個問題,但是其不支持所有類型)
power_tools.sort(key=lambda x: (-x.weight, x.name))
print(power_tools)
>>>
[Tool('jackhammer',   40),
 Tool('circular saw', 5),
 Tool('drill',        4),
 Tool('sander',       4)]

power_tools.sort(key=lambda x: (x.weight, -x.name),
                 reverse=True)
>>>
Traceback ...
TypeError: bad operand type for unary -: 'str'

迫不得已才需要用到多個sort的組合。

power_tools.sort(key=lambda x: x.name) # Name ascending
power_tools.sort(key=lambda x: x.weight, # Weight descending
                 reverse=True)
print(power_tools)

>>>
[Tool('jackhammer',   40),
 Tool('circular saw', 5),
 Tool('drill',        4),
 Tool('sander',       4)]

先對name,再對weight進行排序,所以得到最終的結果。相反,也可以先對weight排序,再對name排序,看個人的需求。


  • Item15:注意dict的插入順序

Python3.5之前,迭代一個dict的時候,順序是隨機的。也就是和原本插入的順序不一致。這個特性使得測試樣例比較難以復現,難以debug。(主要是由于哈希表的實現用了內置的hash函數和隨機種子)

# Python 3.5
baby_names = {
    'cat': 'kitten',
    'dog': 'puppy',
}
print(baby_names)
>>>
{'dog': 'puppy', 'cat': 'kitten'}

3.6之后正常:

baby_names = {
    'cat': 'kitten',
    'dog': 'puppy',
}
print(baby_names)
>>>
{'cat': 'kitten', 'dog': 'puppy'}

因此,3.6之前的依賴于dict的方法(keys, values, items, popitem),都會有這種驚喜存在:

# Python 3.5
print(list(baby_names.keys()))
print(list(baby_names.values()))
print(list(baby_names.items()))
print(baby_names.popitem())  # Randomly chooses an item

>>>
['dog', 'cat']
['puppy', 'kitten']
[('dog', 'puppy'), ('cat', 'kitten')]
('dog', 'puppy')
# 3.6之后
print(list(baby_names.keys()))
print(list(baby_names.values()))
print(list(baby_names.items()))
print(baby_names.popitem()) # Last item inserted
>>>
['cat', 'dog']
['kitten', 'puppy']
[('cat', 'kitten'), ('dog', 'puppy')]
('dog', 'puppy')

比如用**kwargs捕捉所有鍵值對,由于順序問題,難以debug。

# Python 3.5
def my_func(**kwargs):
    for key, value in kwargs.items():
        print('%s = %s' % (key, value))
my_func(goose='gosling', kangaroo='joey')
>>>
kangaroo = joey
goose = gosling

def my_func(**kwargs):
    for key, value in kwargs.items():
        print(f'{key} = {value}')
my_func(goose='gosling', kangaroo='joey')
>>>
goose = gosling
kangaroo = joey

類也用dict作為實例字典。早期版本也是存在順序問題。

# Python 3.5
class MyClass:
    def __init__(self):
        self.alligator = 'hatchling'
        self.elephant = 'calf'
a = MyClass()
for key, value in a.__dict__.items():
    print('%s = %s' % (key, value))
>>>
elephant = calf
alligator = hatchling

# 之后的版本
class MyClass:
    def __init__(self):
        self.alligator = 'hatchling'
        self.elephant = 'calf'
a = MyClass()
for key, value in a.__dict__.items():
    print(f'{key} = {value}')
>>>
alligator = hatchling
elephant = calf

盡管從3.7開始,dict和collections的OrderedDict的表現相似,但是如果經常插入和popitem(比如實現LRU緩存),OrderedDict可能比dict更適合。

再看一個例子。比如現在有動物的投票數字典,然后需求是得到投票數最多的動物,作為贏家,那么可以實現如下:

votes = {
    'otter': 1281,
    'polar bear': 587,
    'fox': 863,
}
def populate_ranks(votes, ranks):
    names = list(votes.keys())
    names.sort(key=votes.get, reverse=True) # 通過得到對應的票數來進行排序。
    for i, name in enumerate(names, 1):
        ranks[name] = i
def get_winner(ranks):
    return next(iter(ranks))

ranks = {}
populate_ranks(votes, ranks)
print(ranks)
winner = get_winner(ranks)
print(winner)
>>>
{'otter': 1, 'fox': 2, 'polar bear': 3}
otter

但是,現在需求變了,現在要的不是投票數順序來遍歷,而是名字的順序來遍歷了。此時可以用collections.abc來定義一個新的字典類(引入了dict-like的類型,但是會引入一些奇怪的bugs):

from collections.abc import MutableMapping
class SortedDict(MutableMapping):
    def __init__(self):
        self.data = {}
    def __getitem__(self, key):
        return self.data[key]
    def __setitem__(self, key, value):
        self.data[key] = value
    def __delitem__(self, key):
        del self.data[key]
    def __iter__(self):
        keys = list(self.data.keys())
        keys.sort()
        for key in keys:
            yield key
    def __len__(self):
          return len(self.data)

sorted_ranks = SortedDict()
populate_ranks(votes, sorted_ranks)
print(sorted_ranks.data)
winner = get_winner(sorted_ranks)
print(winner)
>>>
{'otter': 1, 'fox': 2, 'polar bear': 3}
fox

但是,存在一個問題就是,函數的參數類型并不明顯(主要是Dict的順序問題。):sorted_ranks依賴于populate_ranks的順序。而SortedDict打破了這個假設。可以通過:1)修改get_winner函數,或者2)直接拋出異常,或者3)顯式限制參數的類型(要指定strict運行)來解決這個問題。

def get_winner(ranks):
    for name, rank in ranks.items():
        if rank == 1:
            return name
winner = get_winner(sorted_ranks)
print(winner)
>>>
otter

def get_winner(ranks):
    if not isinstance(ranks, dict):
        raise TypeError('must provide a dict instance')
    return next(iter(ranks))
get_winner(sorted_ranks)
>>>
Traceback ...
TypeError: must provide a dict instance

from typing import Dict, MutableMapping
def populate_ranks(votes: Dict[str, int],
                   ranks: Dict[str, int]) -> None:
    names = list(votes.keys())
    names.sort(key=votes.get, reverse=True)
    for i, name in enumerate(names, 1):
        ranks[name] = i
def get_winner(ranks: Dict[str, int]) -> str:
    return next(iter(ranks))
class SortedDict(MutableMapping[str, int]):
    ...
votes = {
    'otter': 1281,
    'polar bear': 587,
    'fox': 863,
}
sorted_ranks = SortedDict()
populate_ranks(votes, sorted_ranks)
print(sorted_ranks.data)
winner = get_winner(sorted_ranks)
print(winner)

$ python3 -m mypy --strict example.py
.../example.py:48: error: Argument 2 to "populate_ranks" has incompatible type "SortedDict"; expected "Dict[str, int]"
.../example.py:50: error: Argument 1 to "get_winner" has incompatible type "SortedDict"; expected "Dict[str, int]"
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。

推薦閱讀更多精彩內容