| 148 | 1 #!/bin/sh | 
|  | 2 | 
|  | 3 # Copyright (c) 2009, edogawaconan <me@myconan.net> | 
|  | 4 # | 
|  | 5 # Permission to use, copy, modify, and/or distribute this software for any | 
|  | 6 # purpose with or without fee is hereby granted, provided that the above | 
|  | 7 # copyright notice and this permission notice appear in all copies. | 
|  | 8 # | 
|  | 9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | 
|  | 10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | 
|  | 11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | 
|  | 12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 
|  | 13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | 
|  | 14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | 
|  | 15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 
|  | 16 | 
|  | 17 # Version 0.1-alpha1. Lots of bugs here. Use with care | 
|  | 18 # USE WITH CARE | 
|  | 19 | 
|  | 20 # what it does: fetch every picture that has the specified tags. | 
|  | 21 | 
|  | 22 # requirement: wget, libxslt, md5sum (or md5) | 
|  | 23 | 
|  | 24 # configs | 
|  | 25 # program additional paths for: cut, sed, wc, md5(sum), wget, xsltproc | 
|  | 26 extrapath= | 
|  | 27 | 
|  | 28 # md5 calculation, expected output: <32digit md5><space(s)><filename> | 
|  | 29 # gnu: "md5sum", bsd: "md5 -r" | 
|  | 30 md5="md5 -r" | 
|  | 31 | 
|  | 32 # server address. Danbooru only! I do not take responsibility of stupidity. | 
|  | 33 site="moe.imouto.org" | 
|  | 34 | 
|  | 35 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. | 
|  | 36 # Structure is $basedir/<tags> | 
|  | 37 basedir="/home/ifail/test" | 
|  | 38 | 
|  | 39 # not user modifiable from here | 
|  | 40 | 
|  | 41 GENERATE() | 
|  | 42 { | 
|  | 43 	echo | 
|  | 44 	echo "Fetching xml file" | 
|  | 45 	wget "http://$site/post/index.xml?tags=$tags&offset=0&limit=100000" -O "$outdir-xml" -e continue=off | 
|  | 46 	echo "Processing XML file..." | 
|  | 47 	# xslt evilry | 
|  | 48 	xsltproc - "$outdir-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "$outdir-list" | 
|  | 49 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> | 
|  | 50 <xsl:output method="xml" indent="yes"/> | 
|  | 51 <xsl:template match="post"> | 
|  | 52 <xsl:value-of select="@file_url" /> | 
|  | 53 </xsl:template> | 
|  | 54 </xsl:stylesheet> | 
|  | 55 EOF | 
|  | 56 	echo "`echo \`wc -l < \"$outdir-list\" \`` file(s) available on server" | 
|  | 57 	#output file: $outdir-list | 
|  | 58 } | 
|  | 59 | 
|  | 60 CHECK() | 
|  | 61 { | 
|  | 62 	if [ "$ISNEW" -ne 1 ];then | 
|  | 63 		echo "Checking for errors..." | 
|  | 64 		# THE FILES | 
|  | 65 		echo > "$outdir-error" | 
|  | 66 		cd "../$outdir" | 
|  | 67 		for file in `ls` | 
|  | 68 		do | 
|  | 69 			if [ `$md5 "$file" | cut -d ' ' -f1 -` != `echo "$file" | cut -d '.' -f1` ] | 
|  | 70 			then | 
|  | 71 				echo "$file" >> "../temp/$outdir-error" | 
|  | 72 				echo "Error: $file" | 
|  | 73 			fi | 
|  | 74 			printf "." | 
|  | 75 		done | 
|  | 76 		echo | 
|  | 77 		cd ../temp | 
| 153 | 78 		totalerr=`echo \`wc -l < "$outdir-error"\`` | 
| 148 | 79 		echo "$totalerr file(s) error" | 
|  | 80 | 
|  | 81 		echo "Generating list of new files..." | 
|  | 82 		# THE FILES | 
|  | 83 		#ls "../$outdir" | grep -vf "$outdir-error" > "$outdir-ok" | 
|  | 84 		#cat "$outdir-list" | grep -vf "$outdir-ok" > "$outdir-newlist" | 
|  | 85 		ls "../$outdir" | comm -1 -2 "$outdir-error" - > "$outdir-ok" | 
|  | 86 		comm -1 -2 "$outdir-list" "$outdir-error" > "$outdir-newlist" | 
|  | 87 		echo "`echo \`wc -l < \"$outdir-newlist\"\`` file(s) to be downloaded" | 
|  | 88 		cd "../$outdir" | 
|  | 89 | 
|  | 90 		echo "Removing error files" | 
| 153 | 91 		if [ $totalerr -eq 0 ]; then | 
|  | 92 			echo "No error file. 0 file removed" | 
|  | 93 		elif [ $totalerr -gt 0 ]; then | 
| 148 | 94 			cat "../temp/$outdir-error" | xargs rm | 
| 153 | 95 		echo "$totalerr file(s) removed" | 
| 148 | 96 		fi | 
|  | 97 		echo "`echo \`ls | wc -l\`` file(s) available locally" | 
|  | 98 		cd .. | 
|  | 99 	else | 
| 152 | 100 		if [ $ISQUICK -eq 1 ]; then | 
|  | 101 			echo "quick mode selected. Skipping check" | 
|  | 102 		else | 
|  | 103 			echo "Empty local repository" | 
|  | 104 		fi | 
| 148 | 105 		cat "$outdir-list" > "$outdir-newlist" | 
|  | 106 	fi | 
|  | 107 } | 
|  | 108 | 
|  | 109 FETCH() | 
|  | 110 { | 
|  | 111 	if [ `wc -l < "$outdir-newlist"` -eq 0 ] | 
|  | 112 	then | 
|  | 113 		echo "No new file" | 
|  | 114 	else | 
|  | 115 		echo "Starting wget" | 
|  | 116 		cd "../$outdir" | 
| 152 | 117 		wget -bi -e continue=on "../temp/$outdir-newlist" -o "../temp/$outdir.log" | 
| 148 | 118 	fi | 
|  | 119 } | 
|  | 120 | 
|  | 121 | 
|  | 122 # path initialization | 
|  | 123 export PATH=${PATH}:$extrapath | 
|  | 124 | 
|  | 125 # verify all programs required do indeed exist | 
|  | 126 # basic tools | 
|  | 127 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm" | 
|  | 128 cmderr=" " | 
|  | 129 for cmd in $commands | 
|  | 130 do | 
|  | 131 	command -v "$cmd" >/dev/null || cmderr="$cmderr $cmd" | 
|  | 132 done | 
|  | 133 if [ x"$cmderr" != x" " ]; then | 
|  | 134 	echo "$cmderr doesn't exist in $PATH" | 
|  | 135 	exit 1 | 
|  | 136 fi | 
|  | 137 #md5 | 
|  | 138 md5base=`echo $md5 | cut -d ' ' -f 1 -` | 
|  | 139 if [ x`command -v "$md5base" >/dev/null || echo x` != "x" ]; then | 
|  | 140 	echo "$md5base doesn't exist in $PATH" | 
|  | 141 	exit 1 | 
|  | 142 fi | 
| 150 | 143 mdtest= | 
| 151 | 144 if [ `echo test | $md5 | cut -d ' ' -f 1 -` != "d8e8fca2dc0f896fd7cb4cb0031ba249" ]; then | 
| 148 | 145 	echo "$md5 doesn't produce wanted output" | 
|  | 146 	exit 1 | 
|  | 147 fi | 
|  | 148 | 
|  | 149 | 
|  | 150 # all green (part 1)! let's go (until we check the tag) | 
|  | 151 | 
|  | 152 # initialization | 
|  | 153 # are we really doing it? | 
| 152 | 154 HELP="Usage: moefetch (quick)fetch|status <tags>" | 
|  | 155 | 
| 148 | 156 if [ $# -lt 2 ]; then | 
| 152 | 157     echo "$HELP" | 
| 148 | 158     exit 1 | 
|  | 159 fi | 
|  | 160 | 
|  | 161 case "$1" in | 
| 152 | 162 	status|fetch|quickfetch) | 
| 148 | 163 		echo "Starting..." | 
|  | 164 	;; | 
|  | 165 	*) | 
| 152 | 166 		echo "$HELP" | 
| 148 | 167 		exit 1 | 
|  | 168 	;; | 
|  | 169 esac | 
|  | 170 | 
|  | 171 # we did it indeed | 
|  | 172 # get started | 
|  | 173 | 
|  | 174 # do we own the files | 
|  | 175 tags=`echo "$@" | cut -d ' ' -f 2- -` | 
|  | 176 echo "Tags: $tags" | 
|  | 177 # slash do not want | 
|  | 178 outdir=`echo "$tags" | sed -e 's/\//_/g'` | 
| 152 | 179 ISNEW=0 | 
| 148 | 180 if [ -O "$basedir" ]; then | 
|  | 181 	if [ ! -d "$basedir/$outdir" ]; then | 
|  | 182 		ISNEW=1 | 
|  | 183 		mkdir "$basedir/$outdir" | 
|  | 184 	fi | 
|  | 185 	if [ ! -O "$basedir/$outdir" ]; then | 
|  | 186 		echo "You don't own the $basedir/$outdir, applying globally writeable permission on it" | 
|  | 187 		chmod -R u=rwX,g=rwX,o=rwX "$basedir/$outdir" | 
|  | 188 	fi | 
|  | 189 	if [ `echo \`ls "$basedir/$outdir" | wc -l\`` -eq 0 ]; then | 
|  | 190 		ISNEW=1 | 
|  | 191 	fi | 
|  | 192 	if [ ! -d "$basedir/temp" ]; then | 
|  | 193 		mkdir "$basedir/temp" | 
|  | 194 	fi | 
|  | 195 	if [ ! -O "$basedir/temp" ]; then | 
|  | 196 		echo "You don't own the $basedir/temp, applying globally writeable permission on it" | 
|  | 197 		chmod -R u=rwX,g=rwX,o=rwX "$basedir/temp" | 
|  | 198 	fi | 
|  | 199 else | 
|  | 200 	echo "Fatal error: you don't own ${basedir}. Please fix ${basedir}. Stopping" | 
|  | 201 	exit 1 | 
|  | 202 fi | 
|  | 203 # let's move to workdir | 
|  | 204 cd "$basedir/temp" | 
|  | 205 touch "$outdir-error" | 
|  | 206 touch "$outdir-ok" | 
|  | 207 touch "$outdir-list" | 
|  | 208 touch "$outdir-newlist" | 
|  | 209 # | 
|  | 210 | 
|  | 211 # let's do the job! | 
| 152 | 212 ISQUICK=0 | 
| 148 | 213 case "$1" in | 
|  | 214 	status) | 
|  | 215 		GENERATE | 
|  | 216 		CHECK | 
|  | 217 	;; | 
| 152 | 218 	fetch) | 
| 148 | 219 		GENERATE | 
|  | 220 		CHECK | 
|  | 221 		FETCH | 
|  | 222 	;; | 
| 152 | 223 	quickfetch) | 
|  | 224 		GENERATE | 
|  | 225 		ISNEW=1 | 
|  | 226 		ISQUICK=1 | 
|  | 227 		CHECK | 
| 148 | 228 		FETCH | 
|  | 229 esac |