[原创]mangabz漫画网爬取

博主： Atigger
发布时间：2022 年 10 月 18 日
511 次浏览
暂无评论
16649字数
分类：编程

使用python和java两种语言开发

另外作品id需要自己去获取并修改

Python版

# -*- coding: utf-8 -*-
import requests
import os
from bs4 import BeautifulSoup
 
 
def GET_AJAX(URL):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36'}
    doc = requests.get(URL, headers=headers, proxies=proxies)
    if doc.status_code == 200:
        #print(doc.text)
        html = doc.text
        # 获取CID
        cid_string = "MANGABZ_CID="
        cid_num = len(cid_string)
        cid_int = int(html.find(cid_string))
        cid_end = int(html.find(";", cid_int + 1))
        CID = html[cid_int + cid_num:cid_end]
        # 获取MID
        mid_string = "MANGABZ_MID="
        mid_num = len(mid_string)
        mid_int = int(html.find(mid_string))
        mid_end = int(html.find(";", mid_int + 1))
        MID = html[mid_int + mid_num:mid_end]
        # 获取DT
        dt_string = "MANGABZ_VIEWSIGN_DT=\""
        dt_num = len(dt_string)
        dt_int = int(html.find(dt_string))
        dt_end = int(html.find("\"", dt_int + dt_num))
        DT = html[dt_int + dt_num:dt_end]
        # 获取SIGN
        sign_string = "MANGABZ_VIEWSIGN=\""
        sign_num = len(sign_string)
        sign_int = int(html.find(sign_string))
        sign_end = int(html.find("\"", sign_int + dt_num))
        SIGN = html[sign_int + sign_num:sign_end]
        print("CID:" + str(CID) + " MID: " + str(MID) + " DT:" + DT + " SIGN: " + SIGN)
        return {"CID": str(CID), "MID": str(MID), "DT": DT, "SIGN": SIGN}
    elif doc.status_code == 404:
        return 404
    else:
        return 0
 
 
def CHAPTER_DOWNLOAD(chapter_code, title,START_PAGE):
    if START_CHAPTER_CODE != chapter_code:
        START_PAGE = 1
    for i in range(int(START_PAGE), 1000):
        PAGE = i
        URL = "http://www.mangabz.com/m" + chapter_code + "-p" + str(PAGE)
        data = GET_AJAX(URL)
        if data == 404:
            print("本章已结束")
            break
        elif data == 0:
            print("出现错误，将跳过")
            break
        else:
            print("正在获取->" + title + " 第" + str(PAGE) + "页")
            GET_JPG_URL = "http://www.mangabz.com/m" + chapter_code + "/chapterimage.ashx"
            GET_URL(chapter_code, GET_JPG_URL, data, str(PAGE), title)
 
 
def GET_URL(CHAPTER, URL, DATA, PAGE, title):
    CID = DATA.get("CID")
    MID = DATA.get("MID")
    DT = DATA.get("DT")
    SIGN = DATA.get("SIGN")
    params = {'cid': CID, 'page': PAGE, 'key': "", '_cid': CID, '_mid': MID, '_dt': DT, '_sign': SIGN}  # 发送数据
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36',
        'Referer': 'http://www.mangabz.com/m' + CHAPTER}
    doc = requests.get(URL, params=params, headers=headers, proxies=proxies)
    html = doc.text
    page_string = PAGE + "_";
    page_number = html.find(page_string)
    PAGE_URL = html[page_number: html.find("|", page_number + 1)];
    END_URL = "http://image.mangabz.com/2/" + MID + "/" + CID + "/" + PAGE_URL + ".jpg";
    print("图片地址：" + END_URL);
    Download(END_URL, title)
 
 
def Download(URL, title):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36'}
    r = requests.get(URL, headers=headers, stream=True, proxies=proxies)
    img_name_start = URL.rfind("/")
    img_name_end = len(URL)
    img_name = URL[img_name_start + 1:img_name_end]
    if r.status_code == 200:
        print("正在下载:" + img_name)
        isExists = os.path.exists(DIR + title)
        if not isExists:
            os.makedirs(DIR + title)
            print("已创建" + DIR + title)
        open(DIR + title + '/' + img_name, 'wb').write(r.content)  # 将内容写入图片
        print("下载完成")
    del r
 
 
DEF_DIR = "D:/漫画/"
def_ip_add = "127.0.0.1"
def_ip_proxy = "10826"
START_CHAPTER_CODE = "0"
# DIR = DEF_DIR
# CHAPTER = "91436"
# proxies = {
#     'http': 'http://127.0.0.1:10826',
# }
START_PAGE = 1
if (0 == 0):
    ip_add = input("请输入代理IP(默认127.0.0.1)：")
    if ip_add == "":
        ip_add = def_ip_add
    ip_proxy = input("请输入代理端口(默认10826)：")
    if ip_proxy == "":
        ip_proxy = def_ip_proxy
    proxies = {
        'http': 'http://' + ip_add + ':' + ip_proxy,
    }
    START_CHAPTER = input("请输入从倒数第几章开始(默认从最新的章节开始)：")
    if START_CHAPTER != "":
        START_CHAPTER = int(START_CHAPTER) - 1
    else:
        START_CHAPTER = 0
    START_PAGE = input("请输入从第几页开始(默认从第一页开始)：")
    if START_PAGE == "":
        START_PAGE = 1
    DIR = input("请输入保存路径(默认" + DEF_DIR + ")：")
    if DIR == "":
        DIR = DEF_DIR
    print("操作成功！正在准备开始");
    s11 = START_CHAPTER + 1;
    print("代理:" + ip_add + "：" + ip_proxy + " 开始章节：" + str(s11) + " 开始页数：" + str(START_PAGE) + " 保存路径：" + DIR)
    # 开始获取章节列表
    doc = requests.get("http://www.mangabz.com/1864bz/", proxies=proxies)
    html = doc.text
    bf = BeautifulSoup(html, features="html.parser")
    texts = bf.find_all('a', class_='detail-list-form-item')
    texts_num = len(texts)
    for i in range(START_CHAPTER, texts_num):
        html = texts[i]
        html = str(html)
        # 获取章节代码
        # print(html)
        chapter_string = "href=\"/m"
        chapter_num = len(chapter_string)
        chapter_int = int(html.find(chapter_string))
        chapter_end = int(html.find("/", chapter_int + chapter_num))
        chapter_code = html[chapter_int + chapter_num:chapter_end]
        if i == START_CHAPTER:
            START_CHAPTER_CODE = chapter_code;
        # 获取标题
        title_string = "\">"
        title_num = len(title_string)
        title_int = int(html.find(title_string))
        title_end = int(html.find("<", title_int + title_num))
        title = html[title_int + title_num:title_end]
        title = title.rstrip()
        print("正在获取->章节代码：" + chapter_code + " 标题：" + title);
        CHAPTER_DOWNLOAD(chapter_code, title,START_PAGE)

JAVA版

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.*;
import java.net.*;
import java.util.Scanner;

public class Main {
    public static Proxy proxy;//设置代理
    static String DIR = "D:/漫画/";//文件保存路径，没有会自动创建
    public static int START_CHAPTER = 0; //起始章节
    public static int START_PAGE = 1; //起始页码
    public static int START_CHAPTER_CODE = 0; //起始页码

    //static int CHAPTER = 91436;//章节代码

    public static void main(String[] args) throws InterruptedException {
        //创建Scanner对象，接受从控制台输入
        int inter = 10826;
        String ip_add = "127.0.0.1";
        Scanner input = new Scanner(System.in);
        System.out.print("请输入代理地址(默认" + ip_add + ")：");
        String str = input.nextLine();
        if (str.equals("")) {
        } else {
            ip_add = str;
        }
        System.out.print("请输入端口(默认" + inter + ")：");
        String str1 = input.nextLine();
        if (str1.equals("")) {
        } else {
            inter = Integer.parseInt(str1);
        }
        System.out.print("请输入从倒数第几章开始(默认从最新的章节开始)：");
        String str2 = input.nextLine();
        if (str2.equals("")) {
        } else {
            START_CHAPTER = Integer.parseInt(str2) - 1;
        }
        System.out.print("请输入从第几页开始(默认从第一页开始)：");
        String str3 = input.nextLine();
        if (str3.equals("")) {
        } else {
            START_PAGE = Integer.parseInt(str3);
        }
        System.out.print("请输入保存路径(默认" + DIR + ")：");
        String str4 = input.nextLine();
        if (str4.equals("")) {
        } else {
            DIR = str4;
        }
        //输出结果
        System.out.println("操作成功！正在准备开始");
        int s11 = START_CHAPTER + 1;
        System.out.println("代理:" + ip_add + "：" + inter + " 开始章节：" + s11 + " 开始页数：" + START_PAGE + " 保存路径：" + DIR);
        proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip_add, inter));

        Document doc;
        try {
            doc = Jsoup.connect("http://www.mangabz.com/1864bz/").proxy(proxy).get();
            Elements html = doc.select("div.detail-list-form-con > a");
            for (int i = START_CHAPTER; i < html.size(); i++) {
                String html1 = String.valueOf(html.get(i));
                String chapter_string = "href=\"/m";
                int chapter_number = html1.indexOf(chapter_string, 0);
                String chapter = html1.substring(chapter_number + chapter_string.length(), html1.indexOf("/", chapter_number + chapter_string.length()));
                String title_string = "target=\"_blank\">";
                int title_number = html1.indexOf(title_string, 0);
                String title = html1.substring(title_number + title_string.length(), html1.indexOf(" <", title_number + title_string.length()));
                System.out.println("正在获取->章节代码：" + chapter + " 标题：" + title);
                if (i == START_CHAPTER) {
                    START_CHAPTER_CODE = Integer.parseInt(chapter);
                }
                CHAPTER_DOWNLOAD(chapter, title);
            }

        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    //章节内操作
    public static void CHAPTER_DOWNLOAD(String chapter, String title) throws InterruptedException {
        int tag = 0;
        int chapter_code = Integer.parseInt(chapter);
        if (START_CHAPTER_CODE != chapter_code) {
            START_PAGE = 1;
        }
        for (int i = START_PAGE; i < 1000; i++) {
            int PAGE = i;
            String URL;
            URL = "http://www.mangabz.com/m" + chapter + "-p" + PAGE; //漫画的第几页
            String IMG_URL = "";
            JSONObject data;
            System.out.println("正在获取->" + title + " 第" + PAGE + "页");
            data = GET_AJAX(URL);//获取所需数据
            if (data != null) {
                String CID = String.valueOf(data.getString("CID"));
                String MID = String.valueOf(data.getString("MID"));
                String DT = String.valueOf(data.getString("DT"));
                String SIGN = String.valueOf(data.getString("SIGN"));
                String GET_JPG_URL = "http://www.mangabz.com/m" + chapter + "/chapterimage.ashx?cid=" + CID + "&page=" + PAGE + "&key=&_cid=" + CID + "&_mid=" + MID + "&_dt=" + DT + "&_sign=" + SIGN; //发送数据的URL
                IMG_URL = GET_URL(GET_JPG_URL, MID, CID, PAGE);//获取图片URL
                if (tag < 10) {
                    if (IMG_URL.equals("0")) {
                        System.out.println("第" + i + "页获取失败，10秒后将重试");
                        Thread.sleep(10000);
                        i = i - 1;
                        tag++;
                    } else {
                        tag = 0;
                        Download(IMG_URL, title);//下载图片
                    }
                } else {
                    System.out.println("超时次数过多，已自动结束");
                    break;
                }
            } else {
                System.out.println("本章结束");
                break;
            }
        }
    }

    //获取后续需要传的参数
    public static JSONObject GET_AJAX(String Url) {
        JSONObject data = new JSONObject();
        Document doc;
        int CID = 0;
        int MID = 0;
        String DT = "";
        String SIGN = "";
        try {
            Connection.Response response = Jsoup.connect(Url)
                    .userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21")//模拟浏览器
                    .timeout(10000)
                    .proxy(proxy)
                    .ignoreHttpErrors(true)
                    .execute();
            int statusCode = response.statusCode();
            if (statusCode == 200) {
                doc = response.parse();
                String html = String.valueOf(doc);
                //获取CID
                String cid_string = "MANGABZ_CID=";
                int cid_number = html.indexOf(cid_string, 0);
                CID = Integer.parseInt(html.substring(cid_number + cid_string.length(), html.indexOf(";", cid_number + 1)));
                //获取MID
                String mid_string = "MANGABZ_MID=";
                int mid_number = html.indexOf(mid_string, 0);
                MID = Integer.parseInt(html.substring(mid_number + mid_string.length(), html.indexOf(";", mid_number + 1)));
                //获取DT
                String dt_string = "MANGABZ_VIEWSIGN_DT=\"";
                int dt_number = html.indexOf(dt_string, 0);
                DT = html.substring(dt_number + dt_string.length(), html.indexOf("\";", dt_number + dt_string.length()));
                //获取SIGN
                String sign_string = "MANGABZ_VIEWSIGN=\"";
                int sign_number = html.indexOf(sign_string, 0);
                SIGN = html.substring(sign_number + sign_string.length(), html.indexOf("\"", sign_number + sign_string.length()));
                System.out.println("CID=" + CID + " MID=" + MID + " DT=" + DT + " SIGN=" + SIGN);
                data.put("CID", CID);
                data.put("MID", MID);
                data.put("DT", DT);
                data.put("SIGN", SIGN);
                //System.out.println(data);
                return data;
            } else {
                System.out.println("recevied error code : " + statusCode);
                return null;
            }
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static String GET_URL(String Url, String MID, String CID, int PAGE) {
        Document doc;
        String END_URL = null;
        try {
            Connection.Response response = Jsoup.connect(Url)
                    .userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36")//模拟浏览器
                    .referrer("http://www.mangabz.com/m" + CID)
                    .timeout(10000)
                    .proxy(proxy)
                    .ignoreHttpErrors(false)
                    .execute();
            int statusCode = response.statusCode();
            if (statusCode == 200) {
                doc = response.parse();
                String html = String.valueOf(doc);
                //System.out.println(html);
                String page_string = PAGE + "_";
                int page_number = html.indexOf(page_string, 0);
                if (page_number != -1) {
                    String PAGE_URL = html.substring(page_number, html.indexOf("|", page_number + 1));//正则
                    END_URL = "http://image.mangabz.com/2/" + MID + "/" + CID + "/" + PAGE_URL + ".jpg";
                    System.out.println("图片地址：" + END_URL);
                    return END_URL;
                } else {
                    return "0";
                }

            } else {
                System.out.println("recevied error code : " + statusCode);
                return null;
            }


        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    //下载图片
    public static void Download(String Url, String title) {
        try {
            File sf = new File(DIR + title);
            if (!sf.exists()) {
                sf.mkdirs();
            }
            String src = Url;// 获取img中的src路径
            // 获取后缀名
            String imageName = src.substring(src.lastIndexOf("/") + 1, src.length());
            // 连接url
            URL url;
            try {
                url = new URL(src);
                URLConnection uri = url.openConnection(proxy);
                uri.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36");
                // 获取数据流
                System.out.println("正在下载：" + imageName);
                InputStream is = uri.getInputStream();
                // 写入数据流
                OutputStream os = new FileOutputStream(new File(DIR + title, imageName)); //保存路径和文件名
                byte[] buf = new byte[3072];
                int i1 = 0;
                while ((i1 = is.read()) != -1) {
                    os.write(i1);
                }
                System.out.println("下载完成");
                os.close();
            } catch (MalformedURLException e1) {
                e1.printStackTrace();
            }
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }
}

最后修改：2023 年 11 月 07 日

如果觉得我的文章对你有用，请随意赞赏

此处评论已关闭

[原创]mangabz漫画网爬取
评论数： 0
移动端适配
评论数： 0
Vue3的侦听器
评论数： 0
Vue3的toRaw()与markRaw()
评论数： 0
Vue3的Teleport组件
评论数： 0

Vue3的customRef()
评论数： 0
Python万圣节蝙蝠
评论数： 0
js小汇总
评论数： 0
Vue3的shallowReactive()和shallowRef()
评论数： 0
web基础与HTTP协议
评论数： 0

[原创]mangabz漫画网爬取

Atigger • 2022 年 10 月 18 日

<p><br>使用python和java两种语言开发</p><p>另外作品id需要自己去获取并修改</p><h1>Python版</h1><pre><code class="lang-python"># -*- coding: utf-8 -*-
import requests
import os
from bs4 import BeautifulSoup
 
 
def GET_AJAX(URL):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36'}
    doc = requests.get(URL, headers=headers, proxies=proxies)
    if doc.status_code == 200:
        #print(doc.text)
        html = doc.text
        # 获取CID
        cid_string = &quot;MANGABZ_CID=&quot;
        cid_num = len(cid_string)
        cid_int = int(html.find(cid_string))
        cid_end = int(html.find(&quot;;&quot;, cid_int + 1))
        CID = html[cid_int + cid_num:cid_end]
        # 获取MID
        mid_string = &quot;MANGABZ_MID=&quot;
        mid_num = len(mid_string)
        mid_int = int(html.find(mid_string))
        mid_end = int(html.find(&quot;;&quot;, mid_int + 1))
        MID = html[mid_int + mid_num:mid_end]
        # 获取DT
        dt_string = &quot;MANGABZ_VIEWSIGN_DT=\&quot;&quot;
        dt_num = len(dt_string)
        dt_int = int(html.find(dt_string))
        dt_end = int(html.find(&quot;\&quot;&quot;, dt_int + dt_num))
        DT = html[dt_int + dt_num:dt_end]
        # 获取SIGN
        sign_string = &quot;MANGABZ_VIEWSIGN=\&quot;&quot;
        sign_num = len(sign_string)
        sign_int = int(html.find(sign_string))
        sign_end = int(html.find(&quot;\&quot;&quot;, sign_int + dt_num))
        SIGN = html[sign_int + sign_num:sign_end]
        print(&quot;CID:&quot; + str(CID) + &quot; MID: &quot; + str(MID) + &quot; DT:&quot; + DT + &quot; SIGN: &quot; + SIGN)
        return {&quot;CID&quot;: str(CID), &quot;MID&quot;: str(MID), &quot;DT&quot;: DT, &quot;SIGN&quot;: SIGN}
    elif doc.status_code == 404:
        return 404
    else:
        return 0
 
 
def CHAPTER_DOWNLOAD(chapter_code, title,START_PAGE):
    if START_CHAPTER_CODE != chapter_code:
        START_PAGE = 1
    for i in range(int(START_PAGE), 1000):
        PAGE = i
        URL = &quot;http://www.mangabz.com/m&quot; + chapter_code + &quot;-p&quot; + str(PAGE)
        data = GET_AJAX(URL)
        if data == 404:
            print(&quot;本章已结束&quot;)
            break
        elif data == 0:
            print(&quot;出现错误，将跳过&quot;)
            break
        else:
            print(&quot;正在获取-&gt;&quot; + title + &quot; 第&quot; + str(PAGE) + &quot;页&quot;)
            GET_JPG_URL = &quot;http://www.mangabz.com/m&quot; + chapter_code + &quot;/chapterimage.ashx&quot;
            GET_URL(chapter_code, GET_JPG_URL, data, str(PAGE), title)
 
 
def GET_URL(CHAPTER, URL, DATA, PAGE, title):
    CID = DATA.get(&quot;CID&quot;)
    MID = DATA.get(&quot;MID&quot;)
    DT = DATA.get(&quot;DT&quot;)
    SIGN = DATA.get(&quot;SIGN&quot;)
    params = {'cid': CID, 'page': PAGE, 'key': &quot;&quot;, '_cid': CID, '_mid': MID, '_dt': DT, '_sign': SIGN}  # 发送数据
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36',
        'Referer': 'http://www.mangabz.com/m' + CHAPTER}
    doc = requests.get(URL, params=params, headers=headers, proxies=proxies)
    html = doc.text
    page_string = PAGE + &quot;_&quot;;
    page_number = html.find(page_string)
    PAGE_URL = html[page_number: html.find(&quot;|&quot;, page_number + 1)];
    END_URL = &quot;http://image.mangabz.com/2/&quot; + MID + &quot;/&quot; + CID + &quot;/&quot; + PAGE_URL + &quot;.jpg&quot;;
    print(&quot;图片地址：&quot; + END_URL);
    Download(END_URL, title)
 
 
def Download(URL, title):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36'}
    r = requests.get(URL, headers=headers, stream=True, proxies=proxies)
    img_name_start = URL.rfind(&quot;/&quot;)
    img_name_end = len(URL)
    img_name = URL[img_name_start + 1:img_name_end]
    if r.status_code == 200:
        print(&quot;正在下载:&quot; + img_name)
        isExists = os.path.exists(DIR + title)
        if not isExists:
            os.makedirs(DIR + title)
            print(&quot;已创建&quot; + DIR + title)
        open(DIR + title + '/' + img_name, 'wb').write(r.content)  # 将内容写入图片
        print(&quot;下载完成&quot;)
    del r
 
 
DEF_DIR = &quot;D:/漫画/&quot;
def_ip_add = &quot;127.0.0.1&quot;
def_ip_proxy = &quot;10826&quot;
START_CHAPTER_CODE = &quot;0&quot;
# DIR = DEF_DIR
# CHAPTER = &quot;91436&quot;
# proxies = {
#     'http': 'http://127.0.0.1:10826',
# }
START_PAGE = 1
if (0 == 0):
    ip_add = input(&quot;请输入代理IP(默认127.0.0.1)：&quot;)
    if ip_add == &quot;&quot;:
        ip_add = def_ip_add
    ip_proxy = input(&quot;请输入代理端口(默认10826)：&quot;)
    if ip_proxy == &quot;&quot;:
        ip_proxy = def_ip_proxy
    proxies = {
        'http': 'http://' + ip_add + ':' + ip_proxy,
    }
    START_CHAPTER = input(&quot;请输入从倒数第几章开始(默认从最新的章节开始)：&quot;)
    if START_CHAPTER != &quot;&quot;:
        START_CHAPTER = int(START_CHAPTER) - 1
    else:
        START_CHAPTER = 0
    START_PAGE = input(&quot;请输入从第几页开始(默认从第一页开始)：&quot;)
    if START_PAGE == &quot;&quot;:
        START_PAGE = 1
    DIR = input(&quot;请输入保存路径(默认&quot; + DEF_DIR + &quot;)：&quot;)
    if DIR == &quot;&quot;:
        DIR = DEF_DIR
    print(&quot;操作成功！正在准备开始&quot;);
    s11 = START_CHAPTER + 1;
    print(&quot;代理:&quot; + ip_add + &quot;：&quot; + ip_proxy + &quot; 开始章节：&quot; + str(s11) + &quot; 开始页数：&quot; + str(START_PAGE) + &quot; 保存路径：&quot; + DIR)
    # 开始获取章节列表
    doc = requests.get(&quot;http://www.mangabz.com/1864bz/&quot;, proxies=proxies)
    html = doc.text
    bf = BeautifulSoup(html, features=&quot;html.parser&quot;)
    texts = bf.find_all('a', class_='detail-list-form-item')
    texts_num = len(texts)
    for i in range(START_CHAPTER, texts_num):
        html = texts[i]
        html = str(html)
        # 获取章节代码
        # print(html)
        chapter_string = &quot;href=\&quot;/m&quot;
        chapter_num = len(chapter_string)
        chapter_int = int(html.find(chapter_string))
        chapter_end = int(html.find(&quot;/&quot;, chapter_int + chapter_num))
        chapter_code = html[chapter_int + chapter_num:chapter_end]
        if i == START_CHAPTER:
            START_CHAPTER_CODE = chapter_code;
        # 获取标题
        title_string = &quot;\&quot;&gt;&quot;
        title_num = len(title_string)
        title_int = int(html.find(title_string))
        title_end = int(html.find(&quot;&lt;&quot;, title_int + title_num))
        title = html[title_int + title_num:title_end]
        title = title.rstrip()
        print(&quot;正在获取-&gt;章节代码：&quot; + chapter_code + &quot; 标题：&quot; + title);
        CHAPTER_DOWNLOAD(chapter_code, title,START_PAGE)</code></pre><h1>JAVA版</h1><pre><code class="lang-java">import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.*;
import java.net.*;
import java.util.Scanner;

public class Main {
    public static Proxy proxy;//设置代理
    static String DIR = &quot;D:/漫画/&quot;;//文件保存路径，没有会自动创建
    public static int START_CHAPTER = 0; //起始章节
    public static int START_PAGE = 1; //起始页码
    public static int START_CHAPTER_CODE = 0; //起始页码

//static int CHAPTER = 91436;//章节代码

public static void main(String[] args) throws InterruptedException {
        //创建Scanner对象，接受从控制台输入
        int inter = 10826;
        String ip_add = &quot;127.0.0.1&quot;;
        Scanner input = new Scanner(System.in);
        System.out.print(&quot;请输入代理地址(默认&quot; + ip_add + &quot;)：&quot;);
        String str = input.nextLine();
        if (str.equals(&quot;&quot;)) {
        } else {
            ip_add = str;
        }
        System.out.print(&quot;请输入端口(默认&quot; + inter + &quot;)：&quot;);
        String str1 = input.nextLine();
        if (str1.equals(&quot;&quot;)) {
        } else {
            inter = Integer.parseInt(str1);
        }
        System.out.print(&quot;请输入从倒数第几章开始(默认从最新的章节开始)：&quot;);
        String str2 = input.nextLine();
        if (str2.equals(&quot;&quot;)) {
        } else {
            START_CHAPTER = Integer.parseInt(str2) - 1;
        }
        System.out.print(&quot;请输入从第几页开始(默认从第一页开始)：&quot;);
        String str3 = input.nextLine();
        if (str3.equals(&quot;&quot;)) {
        } else {
            START_PAGE = Integer.parseInt(str3);
        }
        System.out.print(&quot;请输入保存路径(默认&quot; + DIR + &quot;)：&quot;);
        String str4 = input.nextLine();
        if (str4.equals(&quot;&quot;)) {
        } else {
            DIR = str4;
        }
        //输出结果
        System.out.println(&quot;操作成功！正在准备开始&quot;);
        int s11 = START_CHAPTER + 1;
        System.out.println(&quot;代理:&quot; + ip_add + &quot;：&quot; + inter + &quot; 开始章节：&quot; + s11 + &quot; 开始页数：&quot; + START_PAGE + &quot; 保存路径：&quot; + DIR);
        proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip_add, inter));

Document doc;
        try {
            doc = Jsoup.connect(&quot;http://www.mangabz.com/1864bz/&quot;).proxy(proxy).get();
            Elements html = doc.select(&quot;div.detail-list-form-con &gt; a&quot;);
            for (int i = START_CHAPTER; i &lt; html.size(); i++) {
                String html1 = String.valueOf(html.get(i));
                String chapter_string = &quot;href=\&quot;/m&quot;;
                int chapter_number = html1.indexOf(chapter_string, 0);
                String chapter = html1.substring(chapter_number + chapter_string.length(), html1.indexOf(&quot;/&quot;, chapter_number + chapter_string.length()));
                String title_string = &quot;target=\&quot;_blank\&quot;&gt;&quot;;
                int title_number = html1.indexOf(title_string, 0);
                String title = html1.substring(title_number + title_string.length(), html1.indexOf(&quot; &lt;&quot;, title_number + title_string.length()));
                System.out.println(&quot;正在获取-&gt;章节代码：&quot; + chapter + &quot; 标题：&quot; + title);
                if (i == START_CHAPTER) {
                    START_CHAPTER_CODE = Integer.parseInt(chapter);
                }
                CHAPTER_DOWNLOAD(chapter, title);
            }

} catch (IOException e) {
            e.printStackTrace();
        }

}

//章节内操作
    public static void CHAPTER_DOWNLOAD(String chapter, String title) throws InterruptedException {
        int tag = 0;
        int chapter_code = Integer.parseInt(chapter);
        if (START_CHAPTER_CODE != chapter_code) {
            START_PAGE = 1;
        }
        for (int i = START_PAGE; i &lt; 1000; i++) {
            int PAGE = i;
            String URL;
            URL = &quot;http://www.mangabz.com/m&quot; + chapter + &quot;-p&quot; + PAGE; //漫画的第几页
            String IMG_URL = &quot;&quot;;
            JSONObject data;
            System.out.println(&quot;正在获取-&gt;&quot; + title + &quot; 第&quot; + PAGE + &quot;页&quot;);
            data = GET_AJAX(URL);//获取所需数据
            if (data != null) {
                String CID = String.valueOf(data.getString(&quot;CID&quot;));
                String MID = String.valueOf(data.getString(&quot;MID&quot;));
                String DT = String.valueOf(data.getString(&quot;DT&quot;));
                String SIGN = String.valueOf(data.getString(&quot;SIGN&quot;));
                String GET_JPG_URL = &quot;http://www.mangabz.com/m&quot; + chapter + &quot;/chapterimage.ashx?cid=&quot; + CID + &quot;&amp;page=&quot; + PAGE + &quot;&amp;key=&amp;_cid=&quot; + CID + &quot;&amp;_mid=&quot; + MID + &quot;&amp;_dt=&quot; + DT + &quot;&amp;_sign=&quot; + SIGN; //发送数据的URL
                IMG_URL = GET_URL(GET_JPG_URL, MID, CID, PAGE);//获取图片URL
                if (tag &lt; 10) {
                    if (IMG_URL.equals(&quot;0&quot;)) {
                        System.out.println(&quot;第&quot; + i + &quot;页获取失败，10秒后将重试&quot;);
                        Thread.sleep(10000);
                        i = i - 1;
                        tag++;
                    } else {
                        tag = 0;
                        Download(IMG_URL, title);//下载图片
                    }
                } else {
                    System.out.println(&quot;超时次数过多，已自动结束&quot;);
                    break;
                }
            } else {
                System.out.println(&quot;本章结束&quot;);
                break;
            }
        }
    }

//获取后续需要传的参数
    public static JSONObject GET_AJAX(String Url) {
        JSONObject data = new JSONObject();
        Document doc;
        int CID = 0;
        int MID = 0;
        String DT = &quot;&quot;;
        String SIGN = &quot;&quot;;
        try {
            Connection.Response response = Jsoup.connect(Url)
                    .userAgent(&quot;Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21&quot;)//模拟浏览器
                    .timeout(10000)
                    .proxy(proxy)
                    .ignoreHttpErrors(true)
                    .execute();
            int statusCode = response.statusCode();
            if (statusCode == 200) {
                doc = response.parse();
                String html = String.valueOf(doc);
                //获取CID
                String cid_string = &quot;MANGABZ_CID=&quot;;
                int cid_number = html.indexOf(cid_string, 0);
                CID = Integer.parseInt(html.substring(cid_number + cid_string.length(), html.indexOf(&quot;;&quot;, cid_number + 1)));
                //获取MID
                String mid_string = &quot;MANGABZ_MID=&quot;;
                int mid_number = html.indexOf(mid_string, 0);
                MID = Integer.parseInt(html.substring(mid_number + mid_string.length(), html.indexOf(&quot;;&quot;, mid_number + 1)));
                //获取DT
                String dt_string = &quot;MANGABZ_VIEWSIGN_DT=\&quot;&quot;;
                int dt_number = html.indexOf(dt_string, 0);
                DT = html.substring(dt_number + dt_string.length(), html.indexOf(&quot;\&quot;;&quot;, dt_number + dt_string.length()));
                //获取SIGN
                String sign_string = &quot;MANGABZ_VIEWSIGN=\&quot;&quot;;
                int sign_number = html.indexOf(sign_string, 0);
                SIGN = html.substring(sign_number + sign_string.length(), html.indexOf(&quot;\&quot;&quot;, sign_number + sign_string.length()));
                System.out.println(&quot;CID=&quot; + CID + &quot; MID=&quot; + MID + &quot; DT=&quot; + DT + &quot; SIGN=&quot; + SIGN);
                data.put(&quot;CID&quot;, CID);
                data.put(&quot;MID&quot;, MID);
                data.put(&quot;DT&quot;, DT);
                data.put(&quot;SIGN&quot;, SIGN);
                //System.out.println(data);
                return data;
            } else {
                System.out.println(&quot;recevied error code : &quot; + statusCode);
                return null;
            }
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

public static String GET_URL(String Url, String MID, String CID, int PAGE) {
        Document doc;
        String END_URL = null;
        try {
            Connection.Response response = Jsoup.connect(Url)
                    .userAgent(&quot;Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36&quot;)//模拟浏览器
                    .referrer(&quot;http://www.mangabz.com/m&quot; + CID)
                    .timeout(10000)
                    .proxy(proxy)
                    .ignoreHttpErrors(false)
                    .execute();
            int statusCode = response.statusCode();
            if (statusCode == 200) {
                doc = response.parse();
                String html = String.valueOf(doc);
                //System.out.println(html);
                String page_string = PAGE + &quot;_&quot;;
                int page_number = html.indexOf(page_string, 0);
                if (page_number != -1) {
                    String PAGE_URL = html.substring(page_number, html.indexOf(&quot;|&quot;, page_number + 1));//正则
                    END_URL = &quot;http://image.mangabz.com/2/&quot; + MID + &quot;/&quot; + CID + &quot;/&quot; + PAGE_URL + &quot;.jpg&quot;;
                    System.out.println(&quot;图片地址：&quot; + END_URL);
                    return END_URL;
                } else {
                    return &quot;0&quot;;
                }

} else {
                System.out.println(&quot;recevied error code : &quot; + statusCode);
                return null;
            }

} catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

//下载图片
    public static void Download(String Url, String title) {
        try {
            File sf = new File(DIR + title);
            if (!sf.exists()) {
                sf.mkdirs();
            }
            String src = Url;// 获取img中的src路径
            // 获取后缀名
            String imageName = src.substring(src.lastIndexOf(&quot;/&quot;) + 1, src.length());
            // 连接url
            URL url;
            try {
                url = new URL(src);
                URLConnection uri = url.openConnection(proxy);
                uri.addRequestProperty(&quot;User-Agent&quot;, &quot;Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4381.8 Safari/537.36&quot;);
                // 获取数据流
                System.out.println(&quot;正在下载：&quot; + imageName);
                InputStream is = uri.getInputStream();
                // 写入数据流
                OutputStream os = new FileOutputStream(new File(DIR + title, imageName)); //保存路径和文件名
                byte[] buf = new byte[3072];
                int i1 = 0;
                while ((i1 = is.read()) != -1) {
                    os.write(i1);
                }
                System.out.println(&quot;下载完成&quot;);
                os.close();
            } catch (MalformedURLException e1) {
                e1.printStackTrace();
            }
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }
}
</code></pre>