from threading import Thread deffunc(): for i inrange(1,1000): print(f"Child :{i}")
t = Thread(target= func) # t 为线程对象,Thread里的target参数设置工作内容 t.start() #设置线程状态为 开始 (只是状态,实际执行时间由CPU决定) if __name__ = '__main__': for i inrange(1,1000): print(f"Base :{i}")
类
1 2 3 4 5 6 7 8 9 10 11 12 13
from threading import Thread
classMythread(Thread): #继承 Thread 类 defrun(self): #固定格式,线程状态开启,自动运行 for i inrange(1, 1000): print(f"Child :{i}")
if __name__ == '__main__': t = Mythread() #t.run #直接调用,为当前线程的单线程执行 t.start() #开启线程 for i inrange(1,1000): print(f"Base :{i}")
遇到参数:
方法
1 2 3 4 5 6 7 8 9
from threading import Thread deffunc(dj): for i inrange(1,1000): print(dj,i) #args 参数必须以元组形式发出 t1 = Thread(target= func,args=("Jay_Z", )) t1.start() t2 = Thread(target= func,args=("kan_ye", )) t2.start()
类
1 2 3 4 5 6 7 8 9
from threading import Thread deffunc(dj): for i inrange(1,1000): print(dj,i) #args 参数必须以元组形式发出 t1 = Thread(target= func,args=("Jay_Z", )) t1.start() t2 = Thread(target= func,args=("kan_ye", )) t2.start()
多进程
多线程应用的API和多进程一样,只是使用库为 multiprocessing . Process
1
from multiprocessing import Process
(这只是因为Python方便用户设置的,实际两者底层原理完全不同)
线程池
线程池:一次性开辟一些线程,用户直接给线程池子提交任务,线程任务的调度交给线程池来完成
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
from concurrent.futures import ThreadPoolExecutor , #打个比喻,这里50个线程可以理解成50个人,for...range(100)相当于有100个任务, #每个任务是从1数到1000,只要一个人(线程)数完了(执行完1-1000),就可以继续数下一个任务
deffn(name): for i inrange(1000): print(name, i)
if __name__ =='__main__': #创建线程池 (该例中有50个线程) with ThreadPoolExecutor(50) as t: for i inrange(100): t.submit(fn, name=f"线程{i}") #线程池任务全部执行完毕后,才继续执行(守护) print("over")
# 1. 如何提取单个页面的数据 # 2. 上线程池,多个页面同时抓取 import requests from lxml import etree import csv from concurrent.futures import ThreadPoolExecutor
f = open("data.csv", mode="w", encoding="utf-8") csvwriter = csv.writer(f)
defdownload_one_page(url): # 拿到页面源代码 resp = requests.get(url) html = etree.HTML(resp.text) table = html.xpath("/html/body/div[2]/div[4]/div[1]/table")[0] trs = table.xpath("./tr[position()>1]") # 拿到每个tr for tr in trs: txt = tr.xpath("./td/text()") # 对数据做简单的处理: “\\”, “/”去掉 txt = (item.replace("\\", "").replace("/", "") for item in txt) # 把数据存放在文件中 csvwriter.writerow(txt) print(url, "提取完毕!")
if __name__ == '__main__': # 创建线程池 with ThreadPoolExecutor(50) as t: for i inrange(1, 200): # 199 * 20 = 3980 # 把下载任务提交给线程池 t.submit(download_one_page, f"http://www.xinfadi.com.cn/marketanalysis/0/list/{i}.shtml")
create_task问题:这个问题对应警告(DeprecationWarning: The explicit passing of coroutine objects to asyncio.wait() is deprecated since Python 3.8, and scheduled for removal in Python 3.11)
s = resp3.content.decode("utf-8") n = 0 for line in s line = line.strip() if line.startswith("#"): continue print(line) resp4 = requests.get(line) with open(f"{n}.ts",mode="wb") as f: f.write(resp4.text) resp4.close() n += 1