comparison moefetch.sh @ 159:75fe19903b74

Major cleanup
author edhoprima@gmail.com <edhoprima@gmail.com>
date Fri, 05 Jun 2009 15:20:36 +0000
parents cba73f6a96bb
children 68227a30d0b3
comparison
equal deleted inserted replaced
158:cba73f6a96bb 159:75fe19903b74
11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 16 #
17 # Version 0.1-alpha1. Lots of bugs here. Use with care 17 # Lots of bugs here. Use with care
18 # USE WITH CARE 18 # USE WITH CARE
19 19 #
20 # what it does: fetch every picture that has the specified tags. 20 # what it does: fetch every picture that has the specified TAGS.
21
22 # requirement: wget, libxslt, md5sum (or md5) 21 # requirement: wget, libxslt, md5sum (or md5)
23 22
24 # configs 23 # program additional paths for: cut, sed, wc, MD5(sum), wget, xsltproc, grep
25 # program additional paths for: cut, sed, wc, md5(sum), wget, xsltproc, grep 24 ADDITIONAL_PATH=
26 extrapath= 25
27 26 # custom md5 path with arguments, expected output: <32digit md5><space(s)><filename>
28 # md5 calculation, expected output: <32digit md5><space(s)><filename> 27 # Leave empty for "md5sum" (Linux, Solaris), "md5 -r" (*BSD)
29 # gnu: "md5sum", bsd: "md5 -r" 28 MD5=
30 md5="md5 -r" 29
31 30 # default server address. Danbooru only! I do not take responsibility of stupidity.
32 # server address. Danbooru only! I do not take responsibility of stupidity. 31 DEFAULT_SITE="moe.imouto.org"
33 site="moe.imouto.org"
34 32
35 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one. 33 # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
36 # Structure is $basedir/<tags> 34 # Structure is ${BASE_DIR}/<TAGS>
37 basedir="/home/ifail/test" 35 # Absolute path only.
36 # Leave empty to use whatever folder you're running this at
37 BASE_DIR=""
38 38
39 # not user modifiable from here 39 # not user modifiable from here
40 40
41 GENERATE() 41 # useless welcome message. Also version
42 { 42 Msg_Welcome() {
43 MOEFETCHVERSION="0.1-beta"
44 cat <<EOF
45 moefetch ${MOEFETCHVERSION}
46 Copyright (c) 2009 edogawaconan <me@myconan.net>
47
48 EOF
49 }
50
51 # fatal error handler
52 Err_Fatal() {
53 echo "Fatal error: ${1}"
54 exit 1
55 }
56
57 # help message
58 Err_Help() {
59 cat <<EOF
60 Usage: moefetch (quick)fetch|status <TAGS>
61 EOF
62 exit 0
63 }
64
65 # generate link by transforming xml
66 Generate_Link() {
67 cd "${BASE_DIR}/temp"
43 echo 68 echo
44 echo "Fetching xml file" 69 echo "Fetching xml file"
45 wget "http://$site/post/index.xml?tags=$tags&offset=0&limit=100000" -O "$outdir-xml" -e continue=off 70 wget "http://${SITE}/post/index.xml?tags=${TAGS}&offset=0&limit=100000" -O "${SITE_DIR}-${TARGET_DIR}-xml" -e continue=off
46 echo "Processing XML file..." 71 echo "Processing XML file..."
47 # xslt evilry 72 # xslt evilry
48 xsltproc - "$outdir-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "$outdir-list" 73 xsltproc - "${TARGET_DIR}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([jp][pn]g\)/\1\2.\3/g' | grep ^http > "${TARGET_DIR}-list"
49 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> 74 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
50 <xsl:output method="xml" indent="yes"/> 75 <xsl:output method="xml" indent="yes"/>
51 <xsl:template match="post"> 76 <xsl:template match="post">
52 <xsl:value-of select="@file_url" /> 77 <xsl:value-of select="@file_url" />
53 </xsl:template> 78 </xsl:template>
54 </xsl:stylesheet> 79 </xsl:stylesheet>
55 EOF 80 EOF
56 echo "`echo \`wc -l < \"$outdir-list\" \`` file(s) available on server" 81 echo "`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-list\" \`` file(s) available on server"
57 #output file: $outdir-list 82 #output file: ${TARGET_DIR}-list
58 } 83 }
59 84
60 CHECK() 85 # check tools availability
61 { 86 Check_Tools() {
62 if [ "$ISNEW" -ne 1 ];then 87 # verify all programs required do indeed exist
88 #MD5
89 if [ ! "${MD5}" ]; then
90 case `uname` in
91 *BSD) MD5="md5 -r";;
92 Linux|SunOS) MD5="md5sum";;
93 *) Fatal_Err "No known md5 tool for this platform. Please specify manually"
94 esac
95 fi
96 MD5_COMMAND=`echo ${MD5} | cut -d' ' -f1`
97 # basic tools
98 COMMANDS="cut sed wc wget xsltproc xargs rm mkdir chown comm grep ${MD5_COMMAND}"
99 for COMMAND in ${COMMANDS}
100 do
101 COMMAND_CHECK=`command -v "${COMMAND}"`
102 [ "${COMMAND_CHECK}" ] || Err_Fatal "${COMMAND} doesn't exist in ${PATH}"
103 done
104
105 # grep checking
106 # originally created for workaround on solaris
107 #if [ `uname` = "SunOS" ]; then
108 FAIL=""
109 echo "blah" > superrandomtestfile
110 echo "blah" > superrandomtestfile.2
111 grep -f superrandomtestfile.2 superrandomtestfile > /dev/null 2>&1 || FAIL=1
112 rm -f superrandomtestfile superrandomtestfile.2
113 [ "${FAIL}" ] && Err_Fatal "Your grep is not compatible. Please install or set path of correct grep"
114 }
115
116 # verify required folders exist and writeable
117 Check_Folders(){
118 [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR}."
119 for FOLDER in temp trash deleted ${TARGET_DIR}
120 do
121 if [ ! -d "${BASE_DIR}/${FOLDER}" ]; then
122 mkdir "${BASE_DIR}/${FOLDER}" || Err_Fatal "${FOLDER} folder creation failed"
123 fi
124 if [ ! -O "${BASE_DIR}/${FOLDER}" ]; then
125 echo "You don't own the ${BASE_DIR}/{$FOLDER}, applying globally writeable permission on it"
126 chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${FOLDER}" || Err_Fatal "Error changing ownership. This shouldn't happen"
127 fi
128 done
129 [ `echo \`ls "${BASE_DIR}/${TARGET_DIR}" | wc -l\`` -eq 0 ] && ISNEW=1
130 # let's move to workdir
131 cd "${BASE_DIR}/temp"
132 for i in error ok list newlist; do
133 touch "${TARGET_DIR}-${i}" || Fatal_Err "Error creating ${TARGET_DIR}-${i}. This shouldn't happen"
134 done
135 #
136 }
137
138 # check files correctness
139 Check_Files() {
140 if [ "$ISNEW" -ne 1 ]; then
63 echo "Checking for errors..." 141 echo "Checking for errors..."
64 # THE FILES 142 # THE FILES
65 printf "" > "$outdir-error" 143
66 cd "../$outdir" 144 # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}
67 for file in `ls` 145 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}"
146 for TRASH in `ls * | sed -e 's/\([0-9a-f]\{32\}.*\)//g' | grep -v ^$`
68 do 147 do
69 if [ `$md5 "$file" | cut -d ' ' -f1 -` != `echo "$file" | cut -d '.' -f1` ] 148 mv -f "${TRASH}" "${BASE_DIR}/trash"
149 echo "Moved ${TRASH} to ${BASE_DIR}/trash"
150 done
151 printf "" > "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error"
152 for FILE in *
153 do
154 if [ `${MD5} "${FILE}" | cut -d ' ' -f1 -` != `echo "${FILE}" | cut -d '.' -f1` ]
70 then 155 then
71 echo "$file" >> "../temp/$outdir-error" 156 echo
72 echo "Error: $file" 157 echo "${FILE}" >> "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error"
158 echo "Error: ${FILE}"
73 fi 159 fi
74 printf "." 160 printf "."
75 done 161 done
76 echo 162 echo
77 cd ../temp 163
78 totalerr=`echo \`wc -l < "$outdir-error"\`` 164 # current dir: ${BASE_DIR}/temp
79 echo "$totalerr file(s) error" 165 cd ${BASE_DIR}/temp
166 TOTAL_ERROR=`echo \`wc -l < "${SITE_DIR}-${TARGET_DIR}-error"\``
167 echo "${TOTAL_ERROR} file(s) error"
80 168
81 echo "Generating list of new files..." 169 echo "Generating list of new files..."
82 # THE FILES 170 # THE FILES
83 #ls "../$outdir" | grep -vf "$outdir-error" > "$outdir-ok" 171 #ls "../${TARGET_DIR}" | grep -vf "${TARGET_DIR}-error" > "${TARGET_DIR}-ok"
84 # 172 #
85 ls "../$outdir" | comm -1 -3 "$outdir-error" - > "$outdir-ok" 173 ls "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}" | comm -1 -3 "${SITE_DIR}-${TARGET_DIR}-error" - > "${SITE_DIR}-${TARGET_DIR}-ok"
86 cat "$outdir-list" | grep -vf "$outdir-ok" > "$outdir-newlist" 174 cat "${SITE_DIR}-${TARGET_DIR}-list" | grep -vf "${SITE_DIR}-${TARGET_DIR}-ok" > "${SITE_DIR}-${TARGET_DIR}-newlist"
87 echo "`echo \`wc -l < \"$outdir-newlist\"\`` file(s) to be downloaded" 175 echo "`echo \`wc -l < \"${SITE_DIR}-${TARGET_DIR}-newlist\"\`` file(s) to be downloaded"
88 cd "../$outdir" 176
177 # back to target dir
178 cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}"
89 179
90 echo "Removing error files" 180 echo "Removing error files"
91 if [ $totalerr -eq 0 ]; then 181 if [ "${TOTAL_ERROR}" -eq 0 ]; then
92 echo "No error file. 0 file removed" 182 echo "No error file. 0 file removed"
93 else 183 else
94 cat "../temp/$outdir-error" | xargs rm 184 cat "${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}-error" | xargs rm
95 echo "$totalerr file(s) removed" 185 echo "${TOTAL_ERROR} file(s) removed"
96 fi 186 fi
97 echo "`echo \`ls | wc -l\`` file(s) available locally" 187 echo "`echo \`ls | wc -l\`` file(s) available locally"
98 cd ../temp 188 cd ../temp
99 else 189 else
100 if [ $ISQUICK -eq 1 ]; then 190 if [ "${ISQUICK}" ]; then
101 echo "quick mode selected. Skipping check" 191 echo "quick mode selected. Skipping check"
102 else 192 else
103 echo "Empty local repository" 193 echo "Empty local repository"
104 fi 194 fi
105 cat "$outdir-list" > "$outdir-newlist" 195 cd "${BASE_DIR}/temp"
196 cat "${SITE_DIR}-${TARGET_DIR}-list" > "${TARGET_DIR}-newlist"
106 fi 197 fi
107 } 198 }
108 199
109 FETCH() 200 # start downloading the images
110 { 201 Fetch_Images() {
111 if [ `echo \`wc -l < "$outdir-newlist"\`` -eq 0 ]; then 202 if [ `echo \`wc -l < "${TARGET_DIR}-newlist"\`` -eq 0 ]; then
112 echo "No new file" 203 echo "No new file"
113 else 204 else
114 echo "Starting wget" 205 echo "Starting wget"
115 cd "../$outdir" 206 cd "../${TARGET_DIR}"
116 wget -e continue=on -bi "../temp/$outdir-newlist" -o "../temp/$outdir.log" 207 wget -e continue=on -bi "../temp/${TARGET_DIR}-newlist" -o "../temp/${TARGET_DIR}.log"
117 fi 208 fi
118 } 209 }
119 210
120 211 # initialize base variables and initial command check
121 # path initialization 212 Init(){
122 export PATH=$extrapath:${PATH} 213 # Get base folder - current folder or fallback to ${HOME}
123 214 [ "${BASE_DIR}" ] || BASE_DIR="${PWD}"
124 # verify all programs required do indeed exist 215 [ "${BASE_DIR}" ] || BASE_DIR="{$HOME}"
216 [ "`echo ${BASE_DIR} | cut -c1 | grep \/`" ] || BASE_DIR="/${BASE_DIR}"
217 # path initialization
218 [ "${ADDITIONAL_PATH}" ] && PATH=${ADDITIONAL_PATH}:${PATH}
219 export PATH
125 220
126 221 # misc variables
127 # basic tools 222 ISQUICK=""
128 commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep" 223 ISNEW=""
129 cmderr=" " 224
130 for cmd in $commands 225 [ $# -lt 2 ] && Err_Help
131 do 226 case "$1" in
132 command -v "$cmd" >/dev/null || cmderr="$cmderr $cmd" 227 status|fetch|quickfetch)
133 done 228 echo "Starting..."
134 if [ x"$cmderr" != x" " ]; then 229 JOB="$1"
135 echo "$cmderr doesn't exist in $PATH" 230 ;;
136 exit 1 231 *)
137 fi 232 Err_Help
138 #md5 233 ;;
139 md5base=`echo $md5 | cut -d ' ' -f 1 -` 234 esac
140 if [ x`command -v "$md5base" >/dev/null || echo x` != "x" ]; then 235 shift
141 echo "$md5base doesn't exist in $PATH" 236 SITE=
142 exit 1 237 case "$1" in
143 fi 238 -s|--site)
144 mdtest= 239 shift
145 if [ `echo test | $md5 | cut -d ' ' -f 1 -` != "d8e8fca2dc0f896fd7cb4cb0031ba249" ]; then 240 SITE="$1"
146 echo "$md5 doesn't produce wanted output" 241 ;;
147 exit 1 242 *)
148 fi 243 SITE=DEFAULT_SITE
149 244 ;;
150 # grep checking 245 esac
151 # originally created for workaround on solaris 246 shift
152 #if [ `uname` = "SunOS" ]; then 247 TAGS="$@"
153 FAIL=0 248 echo "Tags: ${TAGS}"
154 echo "blah" > superrandomtestfile 249 # slash is not wanted for folder name
155 echo "blah" > superrandomtestfile.2 250 TARGET_DIR=`echo "${TAGS}" | sed -e 's/\//_/g'`
156 grep -f superrandomtestfile.2 superrandomtestfile > /dev/null 2>&1 || FAIL=1 251 SITE_DIR=`echo "${SITE}" | sed -e 's/\//_/g'`
157 rm superrandomtestfile superrandomtestfile.2 superrandomtestfile.3 > /dev/null 2>&1 252 }
158 if [ $FAIL = 1 ]; then 253
159 echo "Your grep is not compatible. Please install or set path of correct grep" 254 Msg_Welcome
160 exit 1 255 Init "$@"
161 fi 256 Check_Tools
162 #fi 257 Check_Folders
163 258
164 259
165 # all green (part 1)! let's go (until we check the tag) 260 # let's do the job!
166 261 case "${JOB}" in
167 # initialization 262 status)
168 # are we really doing it? 263 Generate_Link
169 HELP="Usage: moefetch (quick)fetch|status <tags>" 264 Check_Files
170
171 if [ $# -lt 2 ]; then
172 echo "$HELP"
173 exit 1
174 fi
175
176 case "$1" in
177 status|fetch|quickfetch)
178 echo "Starting..."
179 ;; 265 ;;
180 *) 266 fetch)
181 echo "$HELP" 267 Generate_Link
182 exit 1 268 Check_Files
269 Fetch_Images
270 ;;
271 quickfetch)
272 ISNEW=1
273 ISQUICK=1
274 Generate_Link
275 Check_Files
276 Fetch_Images
183 ;; 277 ;;
184 esac 278 esac
185
186 # we did it indeed
187 # get started
188
189 # do we own the files
190 tags=`echo "$@" | cut -d ' ' -f 2- -`
191 echo "Tags: $tags"
192 # slash do not want
193 outdir=`echo "$tags" | sed -e 's/\//_/g'`
194 ISNEW=0
195 if [ -O "$basedir" ]; then
196 if [ ! -d "$basedir/$outdir" ]; then
197 ISNEW=1
198 mkdir "$basedir/$outdir"
199 fi
200 if [ ! -O "$basedir/$outdir" ]; then
201 echo "You don't own the $basedir/$outdir, applying globally writeable permission on it"
202 chmod -R u=rwX,g=rwX,o=rwX "$basedir/$outdir"
203 fi
204 if [ `echo \`ls "$basedir/$outdir" | wc -l\`` -eq 0 ]; then
205 ISNEW=1
206 fi
207 if [ ! -d "$basedir/temp" ]; then
208 mkdir "$basedir/temp"
209 fi
210 if [ ! -O "$basedir/temp" ]; then
211 echo "You don't own the $basedir/temp, applying globally writeable permission on it"
212 chmod -R u=rwX,g=rwX,o=rwX "$basedir/temp"
213 fi
214 else
215 echo "Fatal error: you don't own ${basedir}. Please fix ${basedir}. Stopping"
216 exit 1
217 fi
218 # let's move to workdir
219 cd "$basedir/temp"
220 touch "$outdir-error"
221 touch "$outdir-ok"
222 touch "$outdir-list"
223 touch "$outdir-newlist"
224 #
225
226 # let's do the job!
227 ISQUICK=0
228 case "$1" in
229 status)
230 GENERATE
231 CHECK
232 ;;
233 fetch)
234 GENERATE
235 CHECK
236 FETCH
237 ;;
238 quickfetch)
239 GENERATE
240 ISNEW=1
241 ISQUICK=1
242 CHECK
243 FETCH
244 esac