banner
andrewji8

Being towards death

Heed not to the tree-rustling and leaf-lashing rain, Why not stroll along, whistle and sing under its rein. Lighter and better suited than horses are straw sandals and a bamboo staff, Who's afraid? A palm-leaf plaited cape provides enough to misty weather in life sustain. A thorny spring breeze sobers up the spirit, I feel a slight chill, The setting sun over the mountain offers greetings still. Looking back over the bleak passage survived, The return in time Shall not be affected by windswept rain or shine.
telegram
twitter
github

Python retrieves Douyin search results and batch downloads watermark-free videos.

Install playwright:

pip install playwright

Complete code:


import re
import os
import asyncio
import requests
from playwright.async_api import async_playwright

def save_douyin_videos(keyword, file_name, video_url):
    folder_name = keyword  # Use the keyword to name the folder

    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    file_path = os.path.join(folder_name, filter_filename(file_name).replace("...展开","") + ".mp4")
    with open(file_path, 'wb') as f:
        response = requests.get(video_url, stream=True)
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
def filter_filename(filename):
    filtered_filename = re.sub(r'[\\/:*?"<>| ]+', '', filename)
    return filtered_filename

async def search_douyin_videos(keyword, scroll_count):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        page = await browser.new_page()
        await page.set_viewport_size({"width": 1280, "height": 800})
        await page.goto('https://www.douyin.com/search/搜索')
        await asyncio.sleep(3)
        await asyncio.sleep(3)
        # Enter "python" in the input box
        await page.fill('input', keyword+"视频")
        # Click the search button
        await page.click('.rB8dMXOc')
        await asyncio.sleep(3)
        
        links = await page.query_selector_all('.BL9IYM4m')
        arr = []
        for _ in range(scroll_count):
            xgplayers = await page.query_selector_all('.xgplayer')
            for player in xgplayers:
                video_src = await player.inner_html()
                arr.append(video_src)


            await page.evaluate('window.scrollBy(0, window.innerHeight)')
            await asyncio.sleep(2)
        
        titles = await page.query_selector_all('.KxCuain0')
        print(len(arr))
        print(len(titles))
        for i in range(len(arr)):
            get_title = await titles[i].evaluate('(element) => element.textContent')
            video_src_list = re.findall(r'<video.*?src=["\'](.*?)["\']', arr[i])
            for video_src in video_src_list:
                print(f"Title: {get_title}")
                print(f"Video Source: https:{video_src}")
                save_douyin_videos(keyword, get_title, "https:" + video_src)
                print("-----------")

        await browser.close()

# Call the function to perform the search
asyncio.run(search_douyin_videos('浅跳一下', 20))#Keyword and number of videos to download

Output:

image

Loading...
Ownership of this post data is guaranteed by blockchain and smart contracts to the creator alone.