summaryrefslogtreecommitdiffstats
path: root/lcrawl
diff options
context:
space:
mode:
authorMatthias Schiffer <mschiffer@universe-factory.net>2010-09-22 21:03:29 +0200
committerMatthias Schiffer <mschiffer@universe-factory.net>2010-09-22 21:03:29 +0200
commit132a78114c437a3157bc241e7e938528195dfe22 (patch)
tree79bacc7f6d8ba7f38691b8335d10c4ed4750ff67 /lcrawl
parentccf1c051d96bc84349da873c7bf70a0de9338953 (diff)
downloadutils-132a78114c437a3157bc241e7e938528195dfe22.tar
utils-132a78114c437a3157bc241e7e938528195dfe22.zip
Add lcrawl
Diffstat (limited to 'lcrawl')
-rwxr-xr-xlcrawl30
1 files changed, 30 insertions, 0 deletions
diff --git a/lcrawl b/lcrawl
new file mode 100755
index 0000000..2c087c5
--- /dev/null
+++ b/lcrawl
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+BASIS="$1"
+FILEREGEX="$2"
+EXTREGEX="$3"
+NEXTREGEX="$4"
+
+function crawl() {
+ CURRENT="$1"
+ NUM="$2"
+ LOCAL="$(mktemp)"
+
+ wget -O"$LOCAL" "$CURRENT"
+
+ FILE=$(perl -e 'while (<>) { $_ =~ '"$FILEREGEX"' && print && exit 0}' "$LOCAL")
+ EXT=$(perl -e 'while (<>) { $_ =~ '"$EXTREGEX"' && print && exit 0}' "$LOCAL")
+
+ NEXT=$(perl -e 'while (<>) { $_ =~ '"$NEXTREGEX"' && print && exit 0}; exit 1' "$LOCAL")
+ HAS_NEXT=$?
+
+ wget -O"$(printf "%03i" $NUM).$EXT" "$FILE"
+
+ rm "$LOCAL"
+
+ if [ $HAS_NEXT -eq 0 ]; then
+ crawl "$NEXT" $(expr $NUM + 1)
+ fi
+}
+
+crawl "$BASIS" 0 \ No newline at end of file