summaryrefslogtreecommitdiffstats
path: root/lcrawl
blob: 0e6e113517b128aeab7061047a486aa7be807d6f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/sh

BASIS="$1"
FILEREGEX="$2"
EXTREGEX="$3"
NEXTREGEX="$4"

function crawl() {
    CURRENT="$1"
    NUM="$2"
    LOCAL="$(mktemp)"
    
    wget -O"$LOCAL" "$CURRENT"
    
    FILE=$(perl -e 'while (<>) { $_ =~ '"$FILEREGEX"' && print && exit 0}' "$LOCAL")
    EXT=$(perl -e 'while (<>) { $_ =~ '"$EXTREGEX"' && print && exit 0}' "$LOCAL")
    
    NEXT=$(perl -e 'while (<>) { $_ =~ '"$NEXTREGEX"' && print && exit 0}; exit 1' "$LOCAL")
    HAS_NEXT=$?
    
    wget -O"$(printf "%03i" $NUM).$EXT" "$FILE"
    
    rm "$LOCAL"
    
    if [ $HAS_NEXT -eq 0 ]; then
	crawl "$NEXT" $(expr $NUM + 1)
    fi
}


if [ $# -ne 4 ]; then
    echo "Usage: $0 URL FILE-REGEX EXT-REGEX NEXT-REGEX"
    exit 1
fi


crawl "$BASIS" 0