-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathworm-dl.pl
51 lines (45 loc) · 1.37 KB
/
worm-dl.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env perl
use strict;
use warnings;
use WWW::Mechanize;
use HTML::TreeBuilder::XPath;
use autodie;
my $mech = WWW::Mechanize->new();
my $start_chapter = 0;
my $bookname = 'Worm';
my $bookurl = 'http://parahumans.wordpress.com/category/stories-arcs-1-10/arc-1-gestation/1-01/';
if (@ARGV == 1) {
$start_chapter = $ARGV[0];
open OUTFILE, '>>:encoding(UTF-8)', "$bookname.html";
} else {
open OUTFILE, '>:encoding(UTF-8)', "$bookname.html";
}
print OUTFILE "<!DOCTYPE html>\n",
'<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />',
"\n<title>$bookname</title>\n";
my $link;
#for my $topic ($tree->findnodes('//div[contains(@class,"entry-title")]')) {
while ($bookurl) {
$mech->get($bookurl);
my $tree = HTML::TreeBuilder::XPath->new;
$tree->parse_content($mech->content());
my $step = 1;
for ($tree->findnodes_as_string('//h1[@class="entry-title"]')) {
print "$_";
print OUTFILE "$_\n";
$step *= 2;
}
for ($tree->findnodes_as_string('//div[@class="entry-content"]/p')) {
s!<a.*?>.*?</a>!!g;
print OUTFILE "$_\n";
$step *= 3;
}
unless ($step == 6) {
print "failed with step: $step\n";
exit;
}
print "finding next chapter...\n";
$link = $mech->find_link(text => 'Next Chapter');
$bookurl = eval{ $link->url() } // undef;
}
close OUTFILE;