From 132a78114c437a3157bc241e7e938528195dfe22 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 22 Sep 2010 21:03:29 +0200 Subject: Add lcrawl --- lcrawl | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100755 lcrawl diff --git a/lcrawl b/lcrawl new file mode 100755 index 0000000..2c087c5 --- /dev/null +++ b/lcrawl @@ -0,0 +1,30 @@ +#!/bin/sh + +BASIS="$1" +FILEREGEX="$2" +EXTREGEX="$3" +NEXTREGEX="$4" + +function crawl() { + CURRENT="$1" + NUM="$2" + LOCAL="$(mktemp)" + + wget -O"$LOCAL" "$CURRENT" + + FILE=$(perl -e 'while (<>) { $_ =~ '"$FILEREGEX"' && print && exit 0}' "$LOCAL") + EXT=$(perl -e 'while (<>) { $_ =~ '"$EXTREGEX"' && print && exit 0}' "$LOCAL") + + NEXT=$(perl -e 'while (<>) { $_ =~ '"$NEXTREGEX"' && print && exit 0}; exit 1' "$LOCAL") + HAS_NEXT=$? + + wget -O"$(printf "%03i" $NUM).$EXT" "$FILE" + + rm "$LOCAL" + + if [ $HAS_NEXT -eq 0 ]; then + crawl "$NEXT" $(expr $NUM + 1) + fi +} + +crawl "$BASIS" 0 \ No newline at end of file -- cgit v1.2.3