summaryrefslogtreecommitdiffstats
path: root/lcrawl
diff options
context:
space:
mode:
Diffstat (limited to 'lcrawl')
-rwxr-xr-xlcrawl30
1 files changed, 30 insertions, 0 deletions
diff --git a/lcrawl b/lcrawl
new file mode 100755
index 0000000..2c087c5
--- /dev/null
+++ b/lcrawl
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+BASIS="$1"
+FILEREGEX="$2"
+EXTREGEX="$3"
+NEXTREGEX="$4"
+
+function crawl() {
+ CURRENT="$1"
+ NUM="$2"
+ LOCAL="$(mktemp)"
+
+ wget -O"$LOCAL" "$CURRENT"
+
+ FILE=$(perl -e 'while (<>) { $_ =~ '"$FILEREGEX"' && print && exit 0}' "$LOCAL")
+ EXT=$(perl -e 'while (<>) { $_ =~ '"$EXTREGEX"' && print && exit 0}' "$LOCAL")
+
+ NEXT=$(perl -e 'while (<>) { $_ =~ '"$NEXTREGEX"' && print && exit 0}; exit 1' "$LOCAL")
+ HAS_NEXT=$?
+
+ wget -O"$(printf "%03i" $NUM).$EXT" "$FILE"
+
+ rm "$LOCAL"
+
+ if [ $HAS_NEXT -eq 0 ]; then
+ crawl "$NEXT" $(expr $NUM + 1)
+ fi
+}
+
+crawl "$BASIS" 0 \ No newline at end of file