view moefetch.sh @ 156:d3b002fd944e

fix: my attempt at speeding up things failed. reverting back to trusty grep -vf
author edhoprima
date Sat, 25 Apr 2009 16:16:50 +0000
parents 78ef726d3845
children ffca564daefa
line wrap: on
line source

#!/bin/sh

# Copyright (c) 2009, edogawaconan <me@myconan.net>
# 
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
# 
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

# Version 0.1-alpha1. Lots of bugs here. Use with care
# USE WITH CARE

# what it does: fetch every picture that has the specified tags.

# requirement: wget, libxslt, md5sum (or md5)

# configs
# program additional paths for: cut, sed, wc, md5(sum), wget, xsltproc
extrapath=

# md5 calculation, expected output: <32digit md5><space(s)><filename>
# gnu: "md5sum", bsd: "md5 -r"
md5="md5 -r"

# server address. Danbooru only! I do not take responsibility of stupidity.
site="moe.imouto.org"

# base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
# Structure is $basedir/<tags>
basedir="/home/ifail/test"

# not user modifiable from here

GENERATE()
{
	echo
	echo "Fetching xml file"
	wget "http://$site/post/index.xml?tags=$tags&offset=0&limit=100000" -O "$outdir-xml" -e continue=off
	echo "Processing XML file..."
	# xslt evilry
	xsltproc - "$outdir-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "$outdir-list"
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="post">
<xsl:value-of select="@file_url" />
</xsl:template>
</xsl:stylesheet>
EOF
	echo "`echo \`wc -l < \"$outdir-list\" \`` file(s) available on server"
	#output file: $outdir-list
}

CHECK()
{
	if [ "$ISNEW" -ne 1 ];then
		echo "Checking for errors..."
		# THE FILES
		printf "" > "$outdir-error"
		cd "../$outdir"
		for file in `ls`
		do
			if [ `$md5 "$file" | cut -d ' ' -f1 -` != `echo "$file" | cut -d '.' -f1` ]
			then
				echo "$file" >> "../temp/$outdir-error"
				echo "Error: $file"
			fi
			printf "."
		done
		echo
		cd ../temp
		totalerr=`echo \`wc -l < "$outdir-error"\``
		echo "$totalerr file(s) error"

		echo "Generating list of new files..."
		# THE FILES
		#ls "../$outdir" | grep -vf "$outdir-error" > "$outdir-ok"
		#
		ls "../$outdir" | comm -1 -2 "$outdir-error" - > "$outdir-ok"
		cat "$outdir-list" | grep -vf "$outdir-ok" > "$outdir-newlist"
		echo "`echo \`wc -l < \"$outdir-newlist\"\`` file(s) to be downloaded"
		cd "../$outdir"

		echo "Removing error files"
		if [ $totalerr -eq 0 ]; then
			echo "No error file. 0 file removed"
		else
			cat "../temp/$outdir-error" | xargs rm
			echo "$totalerr file(s) removed"
		fi
		echo "`echo \`ls | wc -l\`` file(s) available locally"
		cd ../temp
	else
		if [ $ISQUICK -eq 1 ]; then
			echo "quick mode selected. Skipping check"
		else
			echo "Empty local repository"
		fi
		cat "$outdir-list" > "$outdir-newlist"
	fi
}

FETCH()
{
	if [ `echo \`wc -l < "$outdir-newlist"\`` -eq 0 ]; then
		echo "No new file"
	else
		echo "Starting wget"
		cd "../$outdir"
		wget -bi -e continue=on "../temp/$outdir-newlist" -o "../temp/$outdir.log"
	fi
}


# path initialization
export PATH=${PATH}:$extrapath

# verify all programs required do indeed exist
# basic tools
commands="cut sed wc wget xsltproc xargs rm mkdir chown comm"
cmderr=" "
for cmd in $commands
do
	command -v "$cmd" >/dev/null || cmderr="$cmderr $cmd"
done
if [ x"$cmderr" != x" " ]; then
	echo "$cmderr doesn't exist in $PATH"
	exit 1
fi
#md5
md5base=`echo $md5 | cut -d ' ' -f 1 -`
if [ x`command -v "$md5base" >/dev/null || echo x` != "x" ]; then
	echo "$md5base doesn't exist in $PATH"
	exit 1
fi
mdtest=
if [ `echo test | $md5 | cut -d ' ' -f 1 -` != "d8e8fca2dc0f896fd7cb4cb0031ba249" ]; then
	echo "$md5 doesn't produce wanted output"
	exit 1
fi


# all green (part 1)! let's go (until we check the tag)

# initialization
# are we really doing it?
HELP="Usage: moefetch (quick)fetch|status <tags>"

if [ $# -lt 2 ]; then
    echo "$HELP"
    exit 1
fi

case "$1" in
	status|fetch|quickfetch)
		echo "Starting..."
	;;
	*)
		echo "$HELP"
		exit 1
	;;
esac

# we did it indeed
# get started

# do we own the files
tags=`echo "$@" | cut -d ' ' -f 2- -`
echo "Tags: $tags"
# slash do not want
outdir=`echo "$tags" | sed -e 's/\//_/g'`
ISNEW=0
if [ -O "$basedir" ]; then
	if [ ! -d "$basedir/$outdir" ]; then
		ISNEW=1
		mkdir "$basedir/$outdir"
	fi
	if [ ! -O "$basedir/$outdir" ]; then
		echo "You don't own the $basedir/$outdir, applying globally writeable permission on it"
		chmod -R u=rwX,g=rwX,o=rwX "$basedir/$outdir"
	fi
	if [ `echo \`ls "$basedir/$outdir" | wc -l\`` -eq 0 ]; then
		ISNEW=1
	fi
	if [ ! -d "$basedir/temp" ]; then
		mkdir "$basedir/temp"
	fi
	if [ ! -O "$basedir/temp" ]; then
		echo "You don't own the $basedir/temp, applying globally writeable permission on it"
		chmod -R u=rwX,g=rwX,o=rwX "$basedir/temp"
	fi
else
	echo "Fatal error: you don't own ${basedir}. Please fix ${basedir}. Stopping"
	exit 1
fi
# let's move to workdir
cd "$basedir/temp"
touch "$outdir-error"
touch "$outdir-ok"
touch "$outdir-list"
touch "$outdir-newlist"
# 

# let's do the job!
ISQUICK=0
case "$1" in
	status)
		GENERATE
		CHECK
	;;
	fetch)
		GENERATE
		CHECK
		FETCH
	;;
	quickfetch)
		GENERATE
		ISNEW=1
		ISQUICK=1
		CHECK
		FETCH
esac