使用python和java两种语言开发
另外作品id需要自己去获取并修改
Python版
# -*- coding: utf-8 -*-
import requests
import os
from bs4 import BeautifulSoup
def GET_AJAX(URL):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36'}
doc = requests.get(URL, headers=headers, proxies=proxies)
if doc.status_code == 200:
#print(doc.text)
html = doc.text
# 获取CID
cid_string = "MANGABZ_CID="
cid_num = len(cid_string)
cid_int = int(html.find(cid_string))
cid_end = int(html.find(";", cid_int + 1))
CID = html[cid_int + cid_num:cid_end]
# 获取MID
mid_string = "MANGABZ_MID="
mid_num = len(mid_string)
mid_int = int(html.find(mid_string))
mid_end = int(html.find(";", mid_int + 1))
MID = html[mid_int + mid_num:mid_end]
# 获取DT
dt_string = "MANGABZ_VIEWSIGN_DT=\""
dt_num = len(dt_string)
dt_int = int(html.find(dt_string))
dt_end = int(html.find("\"", dt_int + dt_num))
DT = html[dt_int + dt_num:dt_end]
# 获取SIGN
sign_string = "MANGABZ_VIEWSIGN=\""
sign_num = len(sign_string)
sign_int = int(html.find(sign_string))
sign_end = int(html.find("\"", sign_int + dt_num))
SIGN = html[sign_int + sign_num:sign_end]
print("CID:" + str(CID) + " MID: " + str(MID) + " DT:" + DT + " SIGN: " + SIGN)
return {"CID": str(CID), "MID": str(MID), "DT": DT, "SIGN": SIGN}
elif doc.status_code == 404:
return 404
else:
return 0
def CHAPTER_DOWNLOAD(chapter_code, title,START_PAGE):
if START_CHAPTER_CODE != chapter_code:
START_PAGE = 1
for i in range(int(START_PAGE), 1000):
PAGE = i
URL = "http://www.mangabz.com/m" + chapter_code + "-p" + str(PAGE)
data = GET_AJAX(URL)
if data == 404:
print("本章已结束")
break
elif data == 0:
print("出现错误,将跳过")
break
else:
print("正在获取->" + title + " 第" + str(PAGE) + "页")
GET_JPG_URL = "http://www.mangabz.com/m" + chapter_code + "/chapterimage.ashx"
GET_URL(chapter_code, GET_JPG_URL, data, str(PAGE), title)
def GET_URL(CHAPTER, URL, DATA, PAGE, title):
CID = DATA.get("CID")
MID = DATA.get("MID")
DT = DATA.get("DT")
SIGN = DATA.get("SIGN")
params = {'cid': CID, 'page': PAGE, 'key': "", '_cid': CID, '_mid': MID, '_dt': DT, '_sign': SIGN} # 发送数据
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36',
'Referer': 'http://www.mangabz.com/m' + CHAPTER}
doc = requests.get(URL, params=params, headers=headers, proxies=proxies)
html = doc.text
page_string = PAGE + "_";
page_number = html.find(page_string)
PAGE_URL = html[page_number: html.find("|", page_number + 1)];
END_URL = "http://image.mangabz.com/2/" + MID + "/" + CID + "/" + PAGE_URL + ".jpg";
print("图片地址:" + END_URL);
Download(END_URL, title)
def Download(URL, title):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36'}
r = requests.get(URL, headers=headers, stream=True, proxies=proxies)
img_name_start = URL.rfind("/")
img_name_end = len(URL)
img_name = URL[img_name_start + 1:img_name_end]
if r.status_code == 200:
print("正在下载:" + img_name)
isExists = os.path.exists(DIR + title)
if not isExists:
os.makedirs(DIR + title)
print("已创建" + DIR + title)
open(DIR + title + '/' + img_name, 'wb').write(r.content) # 将内容写入图片
print("下载完成")
del r
DEF_DIR = "D:/漫画/"
def_ip_add = "127.0.0.1"
def_ip_proxy = "10826"
START_CHAPTER_CODE = "0"
# DIR = DEF_DIR
# CHAPTER = "91436"
# proxies = {
# 'http': 'http://127.0.0.1:10826',
# }
START_PAGE = 1
if (0 == 0):
ip_add = input("请输入代理IP(默认127.0.0.1):")
if ip_add == "":
ip_add = def_ip_add
ip_proxy = input("请输入代理端口(默认10826):")
if ip_proxy == "":
ip_proxy = def_ip_proxy
proxies = {
'http': 'http://' + ip_add + ':' + ip_proxy,
}
START_CHAPTER = input("请输入从倒数第几章开始(默认从最新的章节开始):")
if START_CHAPTER != "":
START_CHAPTER = int(START_CHAPTER) - 1
else:
START_CHAPTER = 0
START_PAGE = input("请输入从第几页开始(默认从第一页开始):")
if START_PAGE == "":
START_PAGE = 1
DIR = input("请输入保存路径(默认" + DEF_DIR + "):")
if DIR == "":
DIR = DEF_DIR
print("操作成功!正在准备开始");
s11 = START_CHAPTER + 1;
print("代理:" + ip_add + ":" + ip_proxy + " 开始章节:" + str(s11) + " 开始页数:" + str(START_PAGE) + " 保存路径:" + DIR)
# 开始获取章节列表
doc = requests.get("http://www.mangabz.com/1864bz/", proxies=proxies)
html = doc.text
bf = BeautifulSoup(html, features="html.parser")
texts = bf.find_all('a', class_='detail-list-form-item')
texts_num = len(texts)
for i in range(START_CHAPTER, texts_num):
html = texts[i]
html = str(html)
# 获取章节代码
# print(html)
chapter_string = "href=\"/m"
chapter_num = len(chapter_string)
chapter_int = int(html.find(chapter_string))
chapter_end = int(html.find("/", chapter_int + chapter_num))
chapter_code = html[chapter_int + chapter_num:chapter_end]
if i == START_CHAPTER:
START_CHAPTER_CODE = chapter_code;
# 获取标题
title_string = "\">"
title_num = len(title_string)
title_int = int(html.find(title_string))
title_end = int(html.find("<", title_int + title_num))
title = html[title_int + title_num:title_end]
title = title.rstrip()
print("正在获取->章节代码:" + chapter_code + " 标题:" + title);
CHAPTER_DOWNLOAD(chapter_code, title,START_PAGE)
JAVA版
import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.*;
import java.util.Scanner;
public class Main {
public static Proxy proxy;//设置代理
static String DIR = "D:/漫画/";//文件保存路径,没有会自动创建
public static int START_CHAPTER = 0; //起始章节
public static int START_PAGE = 1; //起始页码
public static int START_CHAPTER_CODE = 0; //起始页码
//static int CHAPTER = 91436;//章节代码
public static void main(String[] args) throws InterruptedException {
//创建Scanner对象,接受从控制台输入
int inter = 10826;
String ip_add = "127.0.0.1";
Scanner input = new Scanner(System.in);
System.out.print("请输入代理地址(默认" + ip_add + "):");
String str = input.nextLine();
if (str.equals("")) {
} else {
ip_add = str;
}
System.out.print("请输入端口(默认" + inter + "):");
String str1 = input.nextLine();
if (str1.equals("")) {
} else {
inter = Integer.parseInt(str1);
}
System.out.print("请输入从倒数第几章开始(默认从最新的章节开始):");
String str2 = input.nextLine();
if (str2.equals("")) {
} else {
START_CHAPTER = Integer.parseInt(str2) - 1;
}
System.out.print("请输入从第几页开始(默认从第一页开始):");
String str3 = input.nextLine();
if (str3.equals("")) {
} else {
START_PAGE = Integer.parseInt(str3);
}
System.out.print("请输入保存路径(默认" + DIR + "):");
String str4 = input.nextLine();
if (str4.equals("")) {
} else {
DIR = str4;
}
//输出结果
System.out.println("操作成功!正在准备开始");
int s11 = START_CHAPTER + 1;
System.out.println("代理:" + ip_add + ":" + inter + " 开始章节:" + s11 + " 开始页数:" + START_PAGE + " 保存路径:" + DIR);
proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip_add, inter));
Document doc;
try {
doc = Jsoup.connect("http://www.mangabz.com/1864bz/").proxy(proxy).get();
Elements html = doc.select("div.detail-list-form-con > a");
for (int i = START_CHAPTER; i < html.size(); i++) {
String html1 = String.valueOf(html.get(i));
String chapter_string = "href=\"/m";
int chapter_number = html1.indexOf(chapter_string, 0);
String chapter = html1.substring(chapter_number + chapter_string.length(), html1.indexOf("/", chapter_number + chapter_string.length()));
String title_string = "target=\"_blank\">";
int title_number = html1.indexOf(title_string, 0);
String title = html1.substring(title_number + title_string.length(), html1.indexOf(" <", title_number + title_string.length()));
System.out.println("正在获取->章节代码:" + chapter + " 标题:" + title);
if (i == START_CHAPTER) {
START_CHAPTER_CODE = Integer.parseInt(chapter);
}
CHAPTER_DOWNLOAD(chapter, title);
}
} catch (IOException e) {
e.printStackTrace();
}
}
//章节内操作
public static void CHAPTER_DOWNLOAD(String chapter, String title) throws InterruptedException {
int tag = 0;
int chapter_code = Integer.parseInt(chapter);
if (START_CHAPTER_CODE != chapter_code) {
START_PAGE = 1;
}
for (int i = START_PAGE; i < 1000; i++) {
int PAGE = i;
String URL;
URL = "http://www.mangabz.com/m" + chapter + "-p" + PAGE; //漫画的第几页
String IMG_URL = "";
JSONObject data;
System.out.println("正在获取->" + title + " 第" + PAGE + "页");
data = GET_AJAX(URL);//获取所需数据
if (data != null) {
String CID = String.valueOf(data.getString("CID"));
String MID = String.valueOf(data.getString("MID"));
String DT = String.valueOf(data.getString("DT"));
String SIGN = String.valueOf(data.getString("SIGN"));
String GET_JPG_URL = "http://www.mangabz.com/m" + chapter + "/chapterimage.ashx?cid=" + CID + "&page=" + PAGE + "&key=&_cid=" + CID + "&_mid=" + MID + "&_dt=" + DT + "&_sign=" + SIGN; //发送数据的URL
IMG_URL = GET_URL(GET_JPG_URL, MID, CID, PAGE);//获取图片URL
if (tag < 10) {
if (IMG_URL.equals("0")) {
System.out.println("第" + i + "页获取失败,10秒后将重试");
Thread.sleep(10000);
i = i - 1;
tag++;
} else {
tag = 0;
Download(IMG_URL, title);//下载图片
}
} else {
System.out.println("超时次数过多,已自动结束");
break;
}
} else {
System.out.println("本章结束");
break;
}
}
}
//获取后续需要传的参数
public static JSONObject GET_AJAX(String Url) {
JSONObject data = new JSONObject();
Document doc;
int CID = 0;
int MID = 0;
String DT = "";
String SIGN = "";
try {
Connection.Response response = Jsoup.connect(Url)
.userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21")//模拟浏览器
.timeout(10000)
.proxy(proxy)
.ignoreHttpErrors(true)
.execute();
int statusCode = response.statusCode();
if (statusCode == 200) {
doc = response.parse();
String html = String.valueOf(doc);
//获取CID
String cid_string = "MANGABZ_CID=";
int cid_number = html.indexOf(cid_string, 0);
CID = Integer.parseInt(html.substring(cid_number + cid_string.length(), html.indexOf(";", cid_number + 1)));
//获取MID
String mid_string = "MANGABZ_MID=";
int mid_number = html.indexOf(mid_string, 0);
MID = Integer.parseInt(html.substring(mid_number + mid_string.length(), html.indexOf(";", mid_number + 1)));
//获取DT
String dt_string = "MANGABZ_VIEWSIGN_DT=\"";
int dt_number = html.indexOf(dt_string, 0);
DT = html.substring(dt_number + dt_string.length(), html.indexOf("\";", dt_number + dt_string.length()));
//获取SIGN
String sign_string = "MANGABZ_VIEWSIGN=\"";
int sign_number = html.indexOf(sign_string, 0);
SIGN = html.substring(sign_number + sign_string.length(), html.indexOf("\"", sign_number + sign_string.length()));
System.out.println("CID=" + CID + " MID=" + MID + " DT=" + DT + " SIGN=" + SIGN);
data.put("CID", CID);
data.put("MID", MID);
data.put("DT", DT);
data.put("SIGN", SIGN);
//System.out.println(data);
return data;
} else {
System.out.println("recevied error code : " + statusCode);
return null;
}
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public static String GET_URL(String Url, String MID, String CID, int PAGE) {
Document doc;
String END_URL = null;
try {
Connection.Response response = Jsoup.connect(Url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36")//模拟浏览器
.referrer("http://www.mangabz.com/m" + CID)
.timeout(10000)
.proxy(proxy)
.ignoreHttpErrors(false)
.execute();
int statusCode = response.statusCode();
if (statusCode == 200) {
doc = response.parse();
String html = String.valueOf(doc);
//System.out.println(html);
String page_string = PAGE + "_";
int page_number = html.indexOf(page_string, 0);
if (page_number != -1) {
String PAGE_URL = html.substring(page_number, html.indexOf("|", page_number + 1));//正则
END_URL = "http://image.mangabz.com/2/" + MID + "/" + CID + "/" + PAGE_URL + ".jpg";
System.out.println("图片地址:" + END_URL);
return END_URL;
} else {
return "0";
}
} else {
System.out.println("recevied error code : " + statusCode);
return null;
}
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
//下载图片
public static void Download(String Url, String title) {
try {
File sf = new File(DIR + title);
if (!sf.exists()) {
sf.mkdirs();
}
String src = Url;// 获取img中的src路径
// 获取后缀名
String imageName = src.substring(src.lastIndexOf("/") + 1, src.length());
// 连接url
URL url;
try {
url = new URL(src);
URLConnection uri = url.openConnection(proxy);
uri.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36");
// 获取数据流
System.out.println("正在下载:" + imageName);
InputStream is = uri.getInputStream();
// 写入数据流
OutputStream os = new FileOutputStream(new File(DIR + title, imageName)); //保存路径和文件名
byte[] buf = new byte[3072];
int i1 = 0;
while ((i1 = is.read()) != -1) {
os.write(i1);
}
System.out.println("下载完成");
os.close();
} catch (MalformedURLException e1) {
e1.printStackTrace();
}
} catch (IOException e2) {
e2.printStackTrace();
}
}
}
此处评论已关闭