From 765ed07354c655210ee25586988bad98353ebeef Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 8 Jan 2017 21:28:18 +0100 Subject: Initial commit --- fetch-enclosures | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100755 fetch-enclosures (limited to 'fetch-enclosures') diff --git a/fetch-enclosures b/fetch-enclosures new file mode 100755 index 0000000..42d5084 --- /dev/null +++ b/fetch-enclosures @@ -0,0 +1,57 @@ +#!/bin/bash + +set -e +shopt -s nullglob + +if [ $# -gt 1 ]; then + cat >&2 <<'ENDUSAGE' +Usage: fetch-enclosures [ OUTDIR ] + +OUTDIR defaults the current directory. A directory called 'enclosures' will be +created inside the output directory; the output of fetch-pages is expected in +the 'pages' directory inside OUTDIR. +ENDUSAGE + exit 1 +fi + + +OUTDIR="$1" +[ "$OUTDIR" ] || OUTDIR=. + +mkdir -p "${OUTDIR}/enclosures" + + +# Cuts off resize suffixes like _800 +unresize () { + sed -r 's#/([^_/]+)_([^_/]+)_[^_/]+\.#/\1_\2.#' +} + +# http://asset-#.soupcdn.com/asset/XXXXX/YYYY_ZZZZ.EXT will be saved as XXXXX_YYYY_ZZZZ.EXT +filename () { + echo "$1" | sed -r 's#^.*/([^/]+)/([^/]+)$#\1_\2#' +} + +# Using grep for this is not nice, but the Soup HTML is too broken for xsltproc... +extract-images () { + grep -A 1 '
&2 + continue + fi + + echo "Downloading ${url} to ${file}..." >&2 + curl -f -L -o "$file" "${url}" || true + done +done -- cgit v1.2.3