| 148 | 1 #!/bin/sh | 
|  | 2 | 
|  | 3 # Copyright (c) 2009, edogawaconan <me@myconan.net> | 
|  | 4 # | 
|  | 5 # Permission to use, copy, modify, and/or distribute this software for any | 
|  | 6 # purpose with or without fee is hereby granted, provided that the above | 
|  | 7 # copyright notice and this permission notice appear in all copies. | 
|  | 8 # | 
|  | 9 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | 
|  | 10 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | 
|  | 11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | 
|  | 12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 
|  | 13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | 
|  | 14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | 
|  | 15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 
|  | 16 | 
|  | 17 # Version 0.1-alpha1. Lots of bugs here. Use with care | 
|  | 18 # USE WITH CARE | 
|  | 19 | 
|  | 20 # what it does: fetch every picture that has the specified tags. | 
|  | 21 | 
|  | 22 # requirement: wget, libxslt, md5sum (or md5) | 
|  | 23 | 
|  | 24 # configs | 
|  | 25 # program additional paths for: cut, sed, wc, md5(sum), wget, xsltproc | 
|  | 26 extrapath= | 
|  | 27 | 
|  | 28 # md5 calculation, expected output: <32digit md5><space(s)><filename> | 
|  | 29 # gnu: "md5sum", bsd: "md5 -r" | 
|  | 30 md5="md5 -r" | 
|  | 31 | 
|  | 32 # server address. Danbooru only! I do not take responsibility of stupidity. | 
|  | 33 site="moe.imouto.org" | 
|  | 34 | 
|  | 35 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. | 
|  | 36 # Structure is $basedir/<tags> | 
|  | 37 basedir="/home/ifail/test" | 
|  | 38 | 
|  | 39 # not user modifiable from here | 
|  | 40 | 
|  | 41 GENERATE() | 
|  | 42 { | 
|  | 43 	echo | 
|  | 44 	echo "Fetching xml file" | 
|  | 45 	wget "http://$site/post/index.xml?tags=$tags&offset=0&limit=100000" -O "$outdir-xml" -e continue=off | 
|  | 46 	echo "Processing XML file..." | 
|  | 47 	# xslt evilry | 
|  | 48 	xsltproc - "$outdir-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "$outdir-list" | 
|  | 49 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> | 
|  | 50 <xsl:output method="xml" indent="yes"/> | 
|  | 51 <xsl:template match="post"> | 
|  | 52 <xsl:value-of select="@file_url" /> | 
|  | 53 </xsl:template> | 
|  | 54 </xsl:stylesheet> | 
|  | 55 EOF | 
|  | 56 	echo "`echo \`wc -l < \"$outdir-list\" \`` file(s) available on server" | 
|  | 57 	#output file: $outdir-list | 
|  | 58 } | 
|  | 59 | 
|  | 60 CHECK() | 
|  | 61 { | 
|  | 62 	if [ "$ISNEW" -ne 1 ];then | 
|  | 63 		echo "Checking for errors..." | 
|  | 64 		# THE FILES | 
|  | 65 		echo > "$outdir-error" | 
|  | 66 		cd "../$outdir" | 
|  | 67 		for file in `ls` | 
|  | 68 		do | 
|  | 69 			if [ `$md5 "$file" | cut -d ' ' -f1 -` != `echo "$file" | cut -d '.' -f1` ] | 
|  | 70 			then | 
|  | 71 				echo "$file" >> "../temp/$outdir-error" | 
|  | 72 				echo "Error: $file" | 
|  | 73 			fi | 
|  | 74 			printf "." | 
|  | 75 		done | 
|  | 76 		echo | 
|  | 77 		cd ../temp | 
|  | 78 		totalerr=`wc -l < $tags-error` | 
|  | 79 		echo "$totalerr file(s) error" | 
|  | 80 | 
|  | 81 		echo "Generating list of new files..." | 
|  | 82 		# THE FILES | 
|  | 83 		#ls "../$outdir" | grep -vf "$outdir-error" > "$outdir-ok" | 
|  | 84 		#cat "$outdir-list" | grep -vf "$outdir-ok" > "$outdir-newlist" | 
|  | 85 		ls "../$outdir" | comm -1 -2 "$outdir-error" - > "$outdir-ok" | 
|  | 86 		comm -1 -2 "$outdir-list" "$outdir-error" > "$outdir-newlist" | 
|  | 87 		echo "`echo \`wc -l < \"$outdir-newlist\"\`` file(s) to be downloaded" | 
|  | 88 		cd "../$outdir" | 
|  | 89 | 
|  | 90 		echo "Removing error files" | 
|  | 91 		if [ $totalerr -gt 0 ] | 
|  | 92 		then | 
|  | 93 			cat "../temp/$outdir-error" | xargs rm | 
|  | 94 		fi | 
|  | 95 		echo "$totalerr file(s) removed" | 
|  | 96 		echo "`echo \`ls | wc -l\`` file(s) available locally" | 
|  | 97 		cd .. | 
|  | 98 	else | 
| 152 | 99 		if [ $ISQUICK -eq 1 ]; then | 
|  | 100 			echo "quick mode selected. Skipping check" | 
|  | 101 		else | 
|  | 102 			echo "Empty local repository" | 
|  | 103 		fi | 
| 148 | 104 		cat "$outdir-list" > "$outdir-newlist" | 
|  | 105 	fi | 
|  | 106 } | 
|  | 107 | 
|  | 108 FETCH() | 
|  | 109 { | 
|  | 110 	if [ `wc -l < "$outdir-newlist"` -eq 0 ] | 
|  | 111 	then | 
|  | 112 		echo "No new file" | 
|  | 113 	else | 
|  | 114 		echo "Starting wget" | 
|  | 115 		cd "../$outdir" | 
| 152 | 116 		wget -bi -e continue=on "../temp/$outdir-newlist" -o "../temp/$outdir.log" | 
| 148 | 117 	fi | 
|  | 118 } | 
|  | 119 | 
|  | 120 | 
|  | 121 # path initialization | 
|  | 122 export PATH=${PATH}:$extrapath | 
|  | 123 | 
|  | 124 # verify all programs required do indeed exist | 
|  | 125 # basic tools | 
|  | 126 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm" | 
|  | 127 cmderr=" " | 
|  | 128 for cmd in $commands | 
|  | 129 do | 
|  | 130 	command -v "$cmd" >/dev/null || cmderr="$cmderr $cmd" | 
|  | 131 done | 
|  | 132 if [ x"$cmderr" != x" " ]; then | 
|  | 133 	echo "$cmderr doesn't exist in $PATH" | 
|  | 134 	exit 1 | 
|  | 135 fi | 
|  | 136 #md5 | 
|  | 137 md5base=`echo $md5 | cut -d ' ' -f 1 -` | 
|  | 138 if [ x`command -v "$md5base" >/dev/null || echo x` != "x" ]; then | 
|  | 139 	echo "$md5base doesn't exist in $PATH" | 
|  | 140 	exit 1 | 
|  | 141 fi | 
| 150 | 142 mdtest= | 
| 151 | 143 if [ `echo test | $md5 | cut -d ' ' -f 1 -` != "d8e8fca2dc0f896fd7cb4cb0031ba249" ]; then | 
| 148 | 144 	echo "$md5 doesn't produce wanted output" | 
|  | 145 	exit 1 | 
|  | 146 fi | 
|  | 147 | 
|  | 148 | 
|  | 149 # all green (part 1)! let's go (until we check the tag) | 
|  | 150 | 
|  | 151 # initialization | 
|  | 152 # are we really doing it? | 
| 152 | 153 HELP="Usage: moefetch (quick)fetch|status <tags>" | 
|  | 154 | 
| 148 | 155 if [ $# -lt 2 ]; then | 
| 152 | 156     echo "$HELP" | 
| 148 | 157     exit 1 | 
|  | 158 fi | 
|  | 159 | 
|  | 160 case "$1" in | 
| 152 | 161 	status|fetch|quickfetch) | 
| 148 | 162 		echo "Starting..." | 
|  | 163 	;; | 
|  | 164 	*) | 
| 152 | 165 		echo "$HELP" | 
| 148 | 166 		exit 1 | 
|  | 167 	;; | 
|  | 168 esac | 
|  | 169 | 
|  | 170 # we did it indeed | 
|  | 171 # get started | 
|  | 172 | 
|  | 173 # do we own the files | 
|  | 174 tags=`echo "$@" | cut -d ' ' -f 2- -` | 
|  | 175 echo "Tags: $tags" | 
|  | 176 # slash do not want | 
|  | 177 outdir=`echo "$tags" | sed -e 's/\//_/g'` | 
| 152 | 178 ISNEW=0 | 
| 148 | 179 if [ -O "$basedir" ]; then | 
|  | 180 	if [ ! -d "$basedir/$outdir" ]; then | 
|  | 181 		ISNEW=1 | 
|  | 182 		mkdir "$basedir/$outdir" | 
|  | 183 	fi | 
|  | 184 	if [ ! -O "$basedir/$outdir" ]; then | 
|  | 185 		echo "You don't own the $basedir/$outdir, applying globally writeable permission on it" | 
|  | 186 		chmod -R u=rwX,g=rwX,o=rwX "$basedir/$outdir" | 
|  | 187 	fi | 
|  | 188 	if [ `echo \`ls "$basedir/$outdir" | wc -l\`` -eq 0 ]; then | 
|  | 189 		ISNEW=1 | 
|  | 190 	fi | 
|  | 191 	if [ ! -d "$basedir/temp" ]; then | 
|  | 192 		mkdir "$basedir/temp" | 
|  | 193 	fi | 
|  | 194 	if [ ! -O "$basedir/temp" ]; then | 
|  | 195 		echo "You don't own the $basedir/temp, applying globally writeable permission on it" | 
|  | 196 		chmod -R u=rwX,g=rwX,o=rwX "$basedir/temp" | 
|  | 197 	fi | 
|  | 198 else | 
|  | 199 	echo "Fatal error: you don't own ${basedir}. Please fix ${basedir}. Stopping" | 
|  | 200 	exit 1 | 
|  | 201 fi | 
|  | 202 # let's move to workdir | 
|  | 203 cd "$basedir/temp" | 
|  | 204 touch "$outdir-error" | 
|  | 205 touch "$outdir-ok" | 
|  | 206 touch "$outdir-list" | 
|  | 207 touch "$outdir-newlist" | 
|  | 208 # | 
|  | 209 | 
|  | 210 # let's do the job! | 
| 152 | 211 ISQUICK=0 | 
| 148 | 212 case "$1" in | 
|  | 213 	status) | 
|  | 214 		GENERATE | 
|  | 215 		CHECK | 
|  | 216 	;; | 
| 152 | 217 	fetch) | 
| 148 | 218 		GENERATE | 
|  | 219 		CHECK | 
|  | 220 		FETCH | 
|  | 221 	;; | 
| 152 | 222 	quickfetch) | 
|  | 223 		GENERATE | 
|  | 224 		ISNEW=1 | 
|  | 225 		ISQUICK=1 | 
|  | 226 		CHECK | 
| 148 | 227 		FETCH | 
|  | 228 esac |