Files
devops/docker/stash/scripts/scrapers/JavBus_en/JavBus_en.yml
2025-11-14 16:53:08 +08:00

111 lines
3.6 KiB
YAML

name: Javbus_en
sceneByFragment:
action: scrapeXPath
queryURL: https://www.javbus.com/en/{filename}
queryURLReplace:
filename:
- regex: -JG\d
with: ""
- regex: (.*[^a-zA-Z0-9])*([a-zA-Z-]+\d+)(.+)
with: $2
scraper: sceneScraper
sceneByURL:
- action: scrapeXPath
url:
- https://www.javbus.com/en
- https://www.seejav.bid
- https://www.cdnbus.lol
- https://www.dmmbus.lol
- https://www.seedmm.cfd
scraper: sceneScraper
sceneByName:
action: scrapeXPath
queryURL: https://www.javbus.com/en/search/{}&type=&parent=ce
scraper: sceneSearch
sceneByQueryFragment:
action: scrapeXPath
queryURL: "{url}"
scraper: sceneScraper
performerByURL:
- action: scrapeXPath
url:
- https://www.javbus.com/en
- https://www.seejav.bid
- https://www.cdnbus.lol
- https://www.dmmbus.lol
- https://www.seedmm.cfd
scraper: performerScraper
performerByName:
action: scrapeXPath
queryURL: https://www.javbus.com/en/searchstar/{}&type=&parent=ce
scraper: performerSearch
xPathScrapers:
performerSearch:
performer:
Name: //span[@class="mleft"]
URLs: //*[@id="waterfall"]/div/a/@href
performerScraper:
performer:
Name: //*[@id="waterfall"]/div[1]/div/div[2]/span
Birthdate:
selector: //*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), 'D.O.B')]
postProcess:
- replace:
- regex: ^(.*? ){1}
with:
Height:
selector: //*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), 'Height')]
postProcess:
- replace:
- regex: ^(.*? ){1}
with:
# Measurements: //*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '胸圍')]//*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '腰圍')]//*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '臀圍')]//*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '罩杯')]
Image:
selector: //*[@id="waterfall"]/div[1]/div/div[1]/img/@src
postProcess:
- replace:
- regex: ^
with: https://www.javbus.com/en
sceneSearch:
scene:
Title: //div[@class="photo-info"]/span
URL: //*[@id="waterfall"]/div/a/@href
sceneScraper:
scene:
Title:
selector: //div[@class="col-md-3 info"]//span[contains(text(), 'ID')]/../span[2]/text()
URL:
selector: /html/head/link[@hreflang="zh"]/@href
Date:
selector: //div[@class="col-md-3 info"]//span[contains(normalize-space(text()), 'Release Date')]/../text()
#selector: //div[@class="col-md-3 info"]//span[contains(text(), 'Release Date')]/../text()
Details:
selector: //div[@class="container"]/h3/text()
postProcess:
- replace:
- regex: ^(.*? ){1}
with:
Tags:
Name: //div[@class="col-md-3 info"]//span[@class="genre"]/label/a/text()
Performers:
Name: //div[@class="star-name"]/a
Director: //div[@id='video_director']/table/tbody/tr/td[@class="text"]/span/a/text()
Image:
selector: //div[@class="row movie"]/div[@class="col-md-9 screencap"]/a[@class="bigImage"]/img/@src
postProcess:
- replace:
- regex: ^
with: https://www.javbus.com/
Studio:
Name: //div[@class="col-md-3 info"]//span[contains(text(), 'Label')]/../a/text()
driver:
headers:
- Key: User-Agent
Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36
- Key: Accept-Language
Value: zh-cn,en-US
# Last Updated September 17, 2025