changeset 148:378ade047762

(none)
author edhoprima
date Sat, 25 Apr 2009 15:25:06 +0000
parents 88e8acf9b563
children 7200b4339aea
files moefetch.sh
diffstat 1 files changed, 218 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moefetch.sh	Sat Apr 25 15:25:06 2009 +0000
@@ -0,0 +1,218 @@
+#!/bin/sh
+
+# Copyright (c) 2009, edogawaconan <me@myconan.net>
+# 
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# Version 0.1-alpha1. Lots of bugs here. Use with care
+# USE WITH CARE
+
+# what it does: fetch every picture that has the specified tags.
+
+# requirement: wget, libxslt, md5sum (or md5)
+
+# configs
+# program additional paths for: cut, sed, wc, md5(sum), wget, xsltproc
+extrapath=
+
+# md5 calculation, expected output: <32digit md5><space(s)><filename>
+# gnu: "md5sum", bsd: "md5 -r"
+md5="md5 -r"
+
+# server address. Danbooru only! I do not take responsibility of stupidity.
+site="moe.imouto.org"
+
+# base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
+# Structure is $basedir/<tags>
+basedir="/home/ifail/test"
+
+# not user modifiable from here
+
+GENERATE()
+{
+	echo
+	echo "Fetching xml file"
+	wget "http://$site/post/index.xml?tags=$tags&offset=0&limit=100000" -O "$outdir-xml" -e continue=off
+	echo "Processing XML file..."
+	# xslt evilry
+	xsltproc - "$outdir-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "$outdir-list"
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="xml" indent="yes"/>
+<xsl:template match="post">
+<xsl:value-of select="@file_url" />
+</xsl:template>
+</xsl:stylesheet>
+EOF
+	echo "`echo \`wc -l < \"$outdir-list\" \`` file(s) available on server"
+	#output file: $outdir-list
+}
+
+CHECK()
+{
+	if [ "$ISNEW" -ne 1 ];then
+		echo "Checking for errors..."
+		# THE FILES
+		echo > "$outdir-error"
+		cd "../$outdir"
+		for file in `ls`
+		do
+			if [ `$md5 "$file" | cut -d ' ' -f1 -` != `echo "$file" | cut -d '.' -f1` ]
+			then
+				echo "$file" >> "../temp/$outdir-error"
+				echo "Error: $file"
+			fi
+			printf "."
+		done
+		echo
+		cd ../temp
+		totalerr=`wc -l < $tags-error`
+		echo "$totalerr file(s) error"
+
+		echo "Generating list of new files..."
+		# THE FILES
+		#ls "../$outdir" | grep -vf "$outdir-error" > "$outdir-ok"
+		#cat "$outdir-list" | grep -vf "$outdir-ok" > "$outdir-newlist"
+		ls "../$outdir" | comm -1 -2 "$outdir-error" - > "$outdir-ok"
+		comm -1 -2 "$outdir-list" "$outdir-error" > "$outdir-newlist"
+		echo "`echo \`wc -l < \"$outdir-newlist\"\`` file(s) to be downloaded"
+		cd "../$outdir"
+
+		echo "Removing error files"
+		if [ $totalerr -gt 0 ]
+		then
+			cat "../temp/$outdir-error" | xargs rm
+		fi
+		echo "$totalerr file(s) removed"
+		echo "`echo \`ls | wc -l\`` file(s) available locally"
+		cd ..
+	else
+		echo "Empty local repository"
+		cat "$outdir-list" > "$outdir-newlist"
+	fi
+}
+
+FETCH()
+{
+	if [ `wc -l < "$outdir-newlist"` -eq 0 ]
+	then
+		echo "No new file"
+	else
+		echo "Starting wget"
+		cd "../$outdir"
+		wget -bi "../temp/$outdir-newlist" -o "../temp/$outdir.log"
+	fi
+}
+
+
+# path initialization
+export PATH=${PATH}:$extrapath
+
+# verify all programs required do indeed exist
+# basic tools
+commands="cut sed wc wget xsltproc xargs rm mkdir chown comm"
+cmderr=" "
+for cmd in $commands
+do
+	command -v "$cmd" >/dev/null || cmderr="$cmderr $cmd"
+done
+if [ x"$cmderr" != x" " ]; then
+	echo "$cmderr doesn't exist in $PATH"
+	exit 1
+fi
+#md5
+md5base=`echo $md5 | cut -d ' ' -f 1 -`
+if [ x`command -v "$md5base" >/dev/null || echo x` != "x" ]; then
+	echo "$md5base doesn't exist in $PATH"
+	exit 1
+fi
+if [ `$md5 - <<EOF | cut -d ' ' -f 1 -` != "d8e8fca2dc0f896fd7cb4cb0031ba249" ]; then
+test
+EOF
+	echo "$md5 doesn't produce wanted output"
+	exit 1
+fi
+
+
+# all green (part 1)! let's go (until we check the tag)
+
+# initialization
+# are we really doing it?
+if [ $# -lt 2 ]; then
+    echo "Usage: moefetch status|fetch|get <tags>"
+    exit 1
+fi
+
+case "$1" in
+	status|get|update)
+		echo "Starting..."
+	;;
+	*)
+		echo "Usage: moefetch status|fetch|update <tags>"
+		exit 1
+	;;
+esac
+
+# we did it indeed
+# get started
+
+# do we own the files
+tags=`echo "$@" | cut -d ' ' -f 2- -`
+echo "Tags: $tags"
+# slash do not want
+outdir=`echo "$tags" | sed -e 's/\//_/g'`
+if [ -O "$basedir" ]; then
+	if [ ! -d "$basedir/$outdir" ]; then
+		ISNEW=1
+		mkdir "$basedir/$outdir"
+	fi
+	if [ ! -O "$basedir/$outdir" ]; then
+		echo "You don't own the $basedir/$outdir, applying globally writeable permission on it"
+		chmod -R u=rwX,g=rwX,o=rwX "$basedir/$outdir"
+	fi
+	if [ `echo \`ls "$basedir/$outdir" | wc -l\`` -eq 0 ]; then
+		ISNEW=1
+	fi
+	if [ ! -d "$basedir/temp" ]; then
+		mkdir "$basedir/temp"
+	fi
+	if [ ! -O "$basedir/temp" ]; then
+		echo "You don't own the $basedir/temp, applying globally writeable permission on it"
+		chmod -R u=rwX,g=rwX,o=rwX "$basedir/temp"
+	fi
+else
+	echo "Fatal error: you don't own ${basedir}. Please fix ${basedir}. Stopping"
+	exit 1
+fi
+# let's move to workdir
+cd "$basedir/temp"
+touch "$outdir-error"
+touch "$outdir-ok"
+touch "$outdir-list"
+touch "$outdir-newlist"
+# 
+
+# let's do the job!
+case "$1" in
+	status)
+		GENERATE
+		CHECK
+	;;
+	get)
+		GENERATE
+		CHECK
+		FETCH
+	;;
+	update)
+		FETCH
+	;;
+esac