diff --git a/Book b/Book
new file mode 160000
index 0000000..5f0130b
--- /dev/null
+++ b/Book
@@ -0,0 +1 @@
+Subproject commit 5f0130b02d8d9a26532acd6c8dd79b43a957740a
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..f662418
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,42 @@
+
+
+ 4.0.0
+
+ org.example
+ FindBook
+ 1.0-SNAPSHOT
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+ 8
+ 8
+
+
+
+
+
+
+ us.codecraft
+ webmagic-core
+ 0.7.3
+
+
+
+ us.codecraft
+ webmagic-extension
+ 0.7.3
+
+
+
+ com.squareup.okhttp3
+ okhttp
+ 4.9.1
+
+
+
+
\ No newline at end of file
diff --git a/src/main/java/Main.java b/src/main/java/Main.java
new file mode 100644
index 0000000..28a556e
--- /dev/null
+++ b/src/main/java/Main.java
@@ -0,0 +1,9 @@
+import Service.JsoupUtils;
+
+import java.io.IOException;
+
+public class Main {
+ public static void main(String[] args) throws IOException {
+ new JsoupUtils().getBook(1,3,"https://book.qidian.com/info/1029575290/#Catalog");
+ }
+}
diff --git a/src/main/java/Service/HTMLParseUtils.java b/src/main/java/Service/HTMLParseUtils.java
new file mode 100644
index 0000000..2914ea3
--- /dev/null
+++ b/src/main/java/Service/HTMLParseUtils.java
@@ -0,0 +1,165 @@
+package Service;
+
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.processor.PageProcessor;
+import us.codecraft.webmagic.selector.Html;
+import us.codecraft.webmagic.selector.Selectable;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.List;
+
+public class HTMLParseUtils implements PageProcessor {
+
+ private Site site = Site.me()
+
+ .setCharset("utf-8")
+
+ .setTimeOut(1000)
+
+ .setSleepTime(1000);
+
+ String bookName1 = "";
+
+
+ @Override
+
+ public Site getSite() {return site;}
+
+ //爬取数据逻辑
+
+ //第一级URL https://www.qidian.com/xuanhuan 获取书栏目录
+
+ //第二级 https://book.qidian.com/info/1019251979#Catalog 章节目录
+
+ //第三级 https://read.qidian.com/chapter/SaT8jsiJD54smgY_yC2imA2/oQbX6YtwB_NOBDFlr9quQA2 章节内容
+
+ @Override
+
+ public void process(Page page) {
+
+ //获取URL
+
+ Selectable table = page.getUrl();
+
+ //URL匹配 用.{23}去代替字符匹配,每个章节的后缀不一样
+
+ if (table.regex("https://read.qidian.com/chapter/.{23}/.{23}").match()) {//文章章节页面
+
+ Html html = page.getHtml();
+ String title = "";
+ List content = new ArrayList();
+ //判断是否是第一章
+
+ if (html.xpath("/html/body/div[2]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/h1/text()").toString() != null) {//是第一章
+
+ //获取书名
+
+ bookName1 = html.xpath("/html/body/div[2]/div[3]/div[2]/div[1]/div[1]/div[1]/div[1]/h1/text()").toString();
+
+ //System.out.println(bookName);
+
+ //获取章节名
+
+ title = html.xpath("[@class='main-text-wrap']/div[1]/h3/span/text()").toString();
+
+ //System.out.println(title);
+
+ //获取文章内容
+
+ content = html.xpath("[@class='main-text-wrap']/div[2]/p/text()").all();
+
+ } else {//不是第一章
+
+ //获取章节名
+
+ title = html.xpath("[@id='j_chapterBox']/div[1]/div[1]/div[1]/h3/span/text()").toString();
+
+ //获取文章内容
+
+ content = html.xpath("[@id='j_chapterBox']/div[1]/div[1]/div[2]/p/text()").all();
+
+ }
+
+ //存到本地
+
+ downBook(bookName1, title, content);
+
+ }else if(table.regex("https://book.qidian.com/info/\\d{10}#Catalog").match()){//书的章节目录
+
+ //获取每一章节的地址,在章节目录里每一章的xpath
+
+ List url = page.getHtml().xpath("[@class='volume-wrap']/div[1]/ul/li/a/@href").all();
+
+ //加入待爬取序列
+
+ page.addTargetRequests(url);
+
+ }else{//一级url
+
+ Html html = page.getHtml();
+
+ List url = html.xpath("[@id='new-book-list']/div/ul/li/div[2]/h4/a/@href").all();
+
+ List url2 = new ArrayList();
+
+ for (String string : url) {
+
+ url2.add(string + "#Catalog");
+
+ }
+
+ //加入待爬取序列
+
+ page.addTargetRequests(url2);
+
+ }
+
+ }
+
+ //将书存入本地
+
+ private void downBook(String bookName2, String title, List content) {
+
+ //判断目录存不存在
+
+ File file = new File("D:/book.qidian/" + bookName2);
+
+ if(!file.exists()){
+
+ file.mkdirs();
+
+ }
+
+ PrintWriter pw = null; //使用IO流
+
+ try {
+
+ FileOutputStream fos = new FileOutputStream("D:/book.qidian/" + bookName2 + "/" + title + ".txt");
+
+ pw = new PrintWriter(fos,true);
+
+ for (String string : content) {
+
+ pw.println(string);
+
+ }
+
+ System.out.println(title + " " + "爬取完毕");
+
+ } catch (FileNotFoundException e) {
+
+ e.printStackTrace();
+
+ } finally {//关流
+
+ pw.close();
+
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/Service/JsoupUtils.java b/src/main/java/Service/JsoupUtils.java
new file mode 100644
index 0000000..f9f5742
--- /dev/null
+++ b/src/main/java/Service/JsoupUtils.java
@@ -0,0 +1,151 @@
+package Service;
+
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.select.Elements;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class JsoupUtils {
+ public void getBook(String url, int section) throws IOException {
+ if (Jsoup.connect(url).execute().statusCode() == 200) {
+ Document doc = Jsoup.connect(url).get();
+ String bookName = doc.title();
+ Elements links = doc.select("#j-catalogWrap > div.volume-wrap");
+ Elements contentUrl = links.select("a");
+ List urls = contentUrl.stream().map(element -> element.attr("abs:href")).collect(Collectors.toList());
+ List titles = new ArrayList<>();
+ List words_number = new ArrayList<>();
+ for (String a : urls) {
+ if (a.contains("vipreader")) {
+ System.out.println("剩下的是vip章节了");
+ break;
+ } else if (a.contains("chapter")) {
+ Document mo = Jsoup.connect(a).get();
+ String id = mo.getElementsByClass("text-wrap").attr("id");
+ System.out.println(id);
+ Elements title = mo.select("#" + id + " > div > div.text-head > h3 > span.content-wrap");
+ Elements content = mo.select("#" + id + " > div > div.read-content.j_readContent");
+ String tit = title.text();
+ String con = content.text();
+ if (!tit.equals("")) {
+ titles.add(tit);
+ words_number.add(con.length());
+ } else {
+ continue;
+ }
+ downBook(bookName, tit, con, section);
+
+ } else {
+ section++;
+ }
+
+ }
+ if (!titles.isEmpty()) {
+ conclusion(bookName,titles,words_number);
+ }
+ } else {
+ return;
+ }
+
+ }
+
+ //获得从x卷到y卷
+ public void getBook(int start, int end, String url) throws IOException {
+ Document doc = Jsoup.connect(url).get();
+ String bookName = doc.title();
+ Elements links = doc.select("#j-catalogWrap > div.volume-wrap");
+ Elements contentUrl = links.select("a");
+ List urls = contentUrl.stream().map(element -> element.attr("abs:href")).collect(Collectors.toList());
+ List titles = new ArrayList<>();
+ List words_number = new ArrayList<>();
+ for (String a : urls) {
+ if (a.contains("chapter")) {
+ Document mo = Jsoup.connect(a).get();
+ String id = mo.getElementsByClass("text-wrap").attr("id");
+ System.out.println(id);
+ Elements title = mo.select("#" + id + " > div > div.text-head > h3 > span.content-wrap");
+ Elements content = mo.select("#" + id + " > div > div.read-content.j_readContent");
+ String tit = title.text();
+ String con = content.text();
+ if (!tit.equals("")) {
+ titles.add(tit);
+ words_number.add(con.length());
+ } else {
+ continue;
+ }
+
+ downBook(bookName, tit, con, start - 1);
+
+ } else {
+ start++;
+ if (start == end) {
+ conclusion(bookName,titles,words_number);
+ return;
+ }
+ }
+
+ }
+ }
+
+ private static void downBook(String bookName2, String title, String content, int section) {
+
+ File file = new File("D:/book.qidian/" + "#" + bookName2);
+
+ if (!file.exists()) {
+
+ //如果不存在目录,则创建目录
+ file.mkdirs();
+ }
+
+ PrintWriter pw = null;
+ try {
+
+ FileOutputStream fos = new FileOutputStream("D:/book.qidian/" + "#" + bookName2 + "/" + "第" + section + "卷" + title + ".txt");
+
+ pw = new PrintWriter(fos, true);
+
+ pw.println(content);
+
+ System.out.println(title + " " + "爬取完毕");
+
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ return;
+
+ } finally {//关流
+ pw.close();
+ }
+ }
+
+ private static void conclusion(String bookName2, List titles, List words_number) {
+
+ File file = new File("D:/book.qidian/" + "#" + bookName2);
+
+ PrintWriter pw = null;
+ try {
+
+ FileOutputStream fos = new FileOutputStream("D:/book.qidian/" + "#" + bookName2 + "/" + "conclusion" + ".txt");
+
+ pw = new PrintWriter(fos, true);
+
+ for (int i = 0; i < titles.size(); i++) {
+ pw.println(titles.get(i));
+ pw.println(words_number.get(i));
+ }
+
+
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ return;
+
+ } finally {//关流
+ pw.close();
+ }
+ }
+}
+
diff --git a/target/classes/Main.class b/target/classes/Main.class
new file mode 100644
index 0000000..2aa1402
Binary files /dev/null and b/target/classes/Main.class differ
diff --git a/target/classes/Service/HTMLParseUtils.class b/target/classes/Service/HTMLParseUtils.class
new file mode 100644
index 0000000..ccbc126
Binary files /dev/null and b/target/classes/Service/HTMLParseUtils.class differ
diff --git a/target/classes/Service/JsoupUtils.class b/target/classes/Service/JsoupUtils.class
new file mode 100644
index 0000000..7047a27
Binary files /dev/null and b/target/classes/Service/JsoupUtils.class differ