You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
82 lines
3.1 KiB
82 lines
3.1 KiB
#!/bin/bash
|
|
|
|
|
|
##############################################################################
|
|
#
|
|
# Script to take output from Patreon Downloader and turn it into usable
|
|
# markdown, open document, DOCX, and HTML (with linked local images) files.
|
|
#
|
|
# Patreon Downloader = https://github.com/AlexCSDev/PatreonDownloader
|
|
# Pandoc = https://pandoc.org
|
|
#
|
|
# Don't be a jerk with this, support independent artists and creators.
|
|
#
|
|
# (c) Steven Saus 2023
|
|
# Licensed under the MIT license
|
|
#
|
|
# You will get a LOT of "cannot determine media type" warnings; ignore them.
|
|
#
|
|
##############################################################################
|
|
|
|
#convert to usable
|
|
|
|
if [ -d "${1}" ];then
|
|
DIRECTORY="${1}"
|
|
else
|
|
DIRECTORY="${PWD}"
|
|
fi
|
|
|
|
IFS=$'\n'; set -f
|
|
for f in $(find "${DIRECTORY}" -name 'description.html'); do
|
|
currdir=$(dirname "${f}")
|
|
echo "${currdir}"
|
|
|
|
titlestring=$(dirname "$(echo "${f}")" | awk -F '] ' '{print $2}')
|
|
date_posted=$(echo "${titlestring}" | awk '{print $1}')
|
|
title=$(echo "${titlestring}" | awk -F "$date_posted " '{print $2}')
|
|
fixed_title=$(echo "${title}" | sed -e 's/ "/ “/g' -e 's/" /” /g' -e 's/"\./”\./g' -e 's/"\,/”\,/g' -e 's/\."/\.”/g' -e 's/\,"/\,”/g' -e 's/"/“/g' -e "s/'/’/g")
|
|
echo "${fixed_title} Beginning"
|
|
# proper detox for filename
|
|
filename=$(echo "${fixed_title}" | detox --inline)
|
|
pandoc "${f}" -f html -s -o "${currdir}/${filename}.odt"
|
|
pandoc "${f}" -f html -s -t markdown -o "${currdir}/${filename}.md"
|
|
|
|
echo "${currdir}/${filename}.md"
|
|
|
|
# detox of file INLINE
|
|
sed -i -e 's/ "/ “/g' -e 's/" /” /g' -e 's/"\./”\./g' -e 's/"\,/”\,/g' -e 's/\."/\.”/g' -e 's/\,"/\,”/g' -e 's/"/“/g' -e "s/'/’/g" "${currdir}/${filename}.md"
|
|
|
|
sed -i -e 's/\\“/“/g' "${currdir}/${filename}.md"
|
|
sed -i -e 's/\\”/”/g' "${currdir}/${filename}.md"
|
|
sed -i -e 's/\\’/’/g' "${currdir}/${filename}.md"
|
|
|
|
# add header
|
|
sed -i "1s/^/# ${fixed_title}\n \n/" "${currdir}/${filename}.md"
|
|
# To see if images are more properly embedded here now
|
|
pandoc "${currdir}/${filename}.md" -t odt -o "${currdir}/${filename}_alt.odt"
|
|
|
|
# Now to make the epub
|
|
|
|
echo "---" > "${currdir}/title.txt"
|
|
echo "title: ${fixed_title}" >> "${currdir}/title.txt"
|
|
echo "language: en-US" >> "${currdir}/title.txt"
|
|
echo "..." >> "${currdir}/title.txt"
|
|
pandoc -o "${currdir}/${filename}.epub" "${currdir}/title.txt" "${currdir}/${filename}.md"
|
|
|
|
# TRUST ME; if you have more than a few files with a few images, do NOT use
|
|
# pandoc's ability to merge all these into a single epub. Instead use a
|
|
# tool like Calibre to handle them, otherwise you may run out of memory.
|
|
|
|
# Now to bring it back to HTML with images linked properly locally.
|
|
|
|
pandoc "${currdir}/${filename}.epub" -f epub -t html -o "${currdir}/${filename}_final.html"
|
|
cp "${currdir}/${filename}.epub" "${currdir}/${filename}.zip"
|
|
unzip -j "${currdir}/${filename}.zip" "EPUB/media/*" -d "${currdir}/media"
|
|
rm "${currdir}/${filename}.zip"
|
|
|
|
#echo "${f}"
|
|
echo "${fixed_title} ending"
|
|
#echo "${date_posted}"
|
|
|
|
done
|
|
unset IFS; set +f
|
|
|