diff options
author | Matthias Schiffer <mschiffer@universe-factory.net> | 2010-09-22 21:03:29 +0200 |
---|---|---|
committer | Matthias Schiffer <mschiffer@universe-factory.net> | 2010-09-22 21:03:29 +0200 |
commit | 132a78114c437a3157bc241e7e938528195dfe22 (patch) | |
tree | 79bacc7f6d8ba7f38691b8335d10c4ed4750ff67 /lcrawl | |
parent | ccf1c051d96bc84349da873c7bf70a0de9338953 (diff) | |
download | utils-132a78114c437a3157bc241e7e938528195dfe22.tar utils-132a78114c437a3157bc241e7e938528195dfe22.zip |
Add lcrawl
Diffstat (limited to 'lcrawl')
-rwxr-xr-x | lcrawl | 30 |
1 files changed, 30 insertions, 0 deletions
@@ -0,0 +1,30 @@ +#!/bin/sh + +BASIS="$1" +FILEREGEX="$2" +EXTREGEX="$3" +NEXTREGEX="$4" + +function crawl() { + CURRENT="$1" + NUM="$2" + LOCAL="$(mktemp)" + + wget -O"$LOCAL" "$CURRENT" + + FILE=$(perl -e 'while (<>) { $_ =~ '"$FILEREGEX"' && print && exit 0}' "$LOCAL") + EXT=$(perl -e 'while (<>) { $_ =~ '"$EXTREGEX"' && print && exit 0}' "$LOCAL") + + NEXT=$(perl -e 'while (<>) { $_ =~ '"$NEXTREGEX"' && print && exit 0}; exit 1' "$LOCAL") + HAS_NEXT=$? + + wget -O"$(printf "%03i" $NUM).$EXT" "$FILE" + + rm "$LOCAL" + + if [ $HAS_NEXT -eq 0 ]; then + crawl "$NEXT" $(expr $NUM + 1) + fi +} + +crawl "$BASIS" 0
\ No newline at end of file |