Multiple scripts that are useful but don't deserve their own repository.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
multiple_scripts/convert_patreon_downloader_...

82 lines
3.1 KiB

#!/bin/bash
##############################################################################
#
# Script to take output from Patreon Downloader and turn it into usable
# markdown, open document, DOCX, and HTML (with linked local images) files.
#
# Patreon Downloader = https://github.com/AlexCSDev/PatreonDownloader
# Pandoc = https://pandoc.org
#
# Don't be a jerk with this, support independent artists and creators.
#
# (c) Steven Saus 2023
# Licensed under the MIT license
#
# You will get a LOT of "cannot determine media type" warnings; ignore them.
#
##############################################################################
#convert to usable
if [ -d "${1}" ];then
DIRECTORY="${1}"
else
DIRECTORY="${PWD}"
fi
IFS=$'\n'; set -f
for f in $(find "${DIRECTORY}" -name 'description.html'); do
currdir=$(dirname "${f}")
echo "${currdir}"
titlestring=$(dirname "$(echo "${f}")" | awk -F '] ' '{print $2}')
date_posted=$(echo "${titlestring}" | awk '{print $1}')
title=$(echo "${titlestring}" | awk -F "$date_posted " '{print $2}')
fixed_title=$(echo "${title}" | sed -e 's/ "/ “/g' -e 's/" /” /g' -e 's/"\./”\./g' -e 's/"\,/”\,/g' -e 's/\."/\.”/g' -e 's/\,"/\,”/g' -e 's/"/“/g' -e "s/'/’/g")
echo "${fixed_title} Beginning"
# proper detox for filename
filename=$(echo "${fixed_title}" | detox --inline)
pandoc "${f}" -f html -s -o "${currdir}/${filename}.odt"
pandoc "${f}" -f html -s -t markdown -o "${currdir}/${filename}.md"
echo "${currdir}/${filename}.md"
# detox of file INLINE
sed -i -e 's/ "/ “/g' -e 's/" /” /g' -e 's/"\./”\./g' -e 's/"\,/”\,/g' -e 's/\."/\.”/g' -e 's/\,"/\,”/g' -e 's/"/“/g' -e "s/'/’/g" "${currdir}/${filename}.md"
sed -i -e 's/\\“/“/g' "${currdir}/${filename}.md"
sed -i -e 's/\\”/”/g' "${currdir}/${filename}.md"
sed -i -e 's/\\’/’/g' "${currdir}/${filename}.md"
# add header
sed -i "1s/^/# ${fixed_title}\n \n/" "${currdir}/${filename}.md"
# To see if images are more properly embedded here now
pandoc "${currdir}/${filename}.md" -t odt -o "${currdir}/${filename}_alt.odt"
# Now to make the epub
echo "---" > "${currdir}/title.txt"
echo "title: ${fixed_title}" >> "${currdir}/title.txt"
echo "language: en-US" >> "${currdir}/title.txt"
echo "..." >> "${currdir}/title.txt"
pandoc -o "${currdir}/${filename}.epub" "${currdir}/title.txt" "${currdir}/${filename}.md"
# TRUST ME; if you have more than a few files with a few images, do NOT use
# pandoc's ability to merge all these into a single epub. Instead use a
# tool like Calibre to handle them, otherwise you may run out of memory.
# Now to bring it back to HTML with images linked properly locally.
pandoc "${currdir}/${filename}.epub" -f epub -t html -o "${currdir}/${filename}_final.html"
cp "${currdir}/${filename}.epub" "${currdir}/${filename}.zip"
unzip -j "${currdir}/${filename}.zip" "EPUB/media/*" -d "${currdir}/media"
rm "${currdir}/${filename}.zip"
#echo "${f}"
echo "${fixed_title} ending"
#echo "${date_posted}"
done
unset IFS; set +f