PythonでTikTokの検索結果を取得し、ウォーターマークのない動画を一括でダウンロードする

playwright のインストール:

pip install playwright

完全なコード：


import re
import os
import asyncio
import requests
from playwright.async_api import async_playwright

def save_douyin_videos(keyword, file_name, video_url):
    folder_name = keyword  # キーワードでフォルダを作成

    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    file_path = os.path.join(folder_name, filter_filename(file_name).replace("...展开","") + ".mp4")
    with open(file_path, 'wb') as f:
        response = requests.get(video_url, stream=True)
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
def filter_filename(filename):
    filtered_filename = re.sub(r'[\\/:*?"<>| ]+', '', filename)
    return filtered_filename

async def search_douyin_videos(keyword, scroll_count):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        page = await browser.new_page()
        await page.set_viewport_size({"width": 1280, "height": 800})
        await page.goto('https://www.douyin.com/search/搜索')
        await asyncio.sleep(3)
        await asyncio.sleep(3)
        # 入力ボックスに「python」と入力
        await page.fill('input', keyword+"视频")
        # 検索ボタンをクリック
        await page.click('.rB8dMXOc')
        await asyncio.sleep(3)
        
        links = await page.query_selector_all('.BL9IYM4m')
        arr = []
        for _ in range(scroll_count):
            xgplayers = await page.query_selector_all('.xgplayer')
            for player in xgplayers:
                video_src = await player.inner_html()
                arr.append(video_src)


            await page.evaluate('window.scrollBy(0, window.innerHeight)')
            await asyncio.sleep(2)
        
        titles = await page.query_selector_all('.KxCuain0')
        print(len(arr))
        print(len(titles))
        for i in range(len(arr)):
            get_title = await titles[i].evaluate('(element) => element.textContent')
            video_src_list = re.findall(r'<video.*?src=["\'](.*?)["\']', arr[i])
            for video_src in video_src_list:
                print(f"タイトル: {get_title}")
                print(f"ビデオソース: https:{video_src}")
                save_douyin_videos(keyword, get_title, "https:" + video_src)
                print("-----------")

        await browser.close()

# 関数を呼び出して検索する
asyncio.run(search_douyin_videos('浅跳一下', 20))#キーワードとビデオのダウンロード数

出力：