#!/bin/bash
set -euo pipefail

subreddit=
update=false
start_from=
while [[ $# -gt 0 ]]; do
  case $1 in
    -u|--update)
      update="true"
      shift # past argument
      ;;
    -s|--start-from)
      start_from="$1"
      shift # past argument
      shift # past value
      ;;
    -h|--help)
	    echo "Usage: download-reddit [OPTIONS]... [SUBREDDIT]"
	    echo
	    echo -e "  -h, --help"
	    echo -e "\t\tRuns this command"
	    echo -e "  -u, --update"
	    echo -e "\t\tUpdates the existing archive instead of pulling it all anew"
	    echo -e "  -s, --start-from <after handle>"
	    echo -e "\t\tStarts archiving images from a specific handle onwards"

	    exit 0
      ;;
    -*|--*)
      echo "Unknown option $1"
      exit 1
      ;;
    *)
      if [ "$subreddit" ]; then
        echo "Incorrect number of arguments"
        exit 1
      else
        subreddit=("$1") # save positional arg
        shift # past argument
      fi
      ;;
  esac
done

if [ ! "$subreddit" ]; then
  echo "error: No subreddit provided"
  exit 1
fi

save_path="reddit/$subreddit"
next_page="$start_from"

function save_image() {
  image=$1
  post_id=$2
  image_num=$3

  extension=$(echo "$image" | sed -E 's/.*?\.(.*?)(\?.*?|$)/\1/g')
  image_save_path="${save_path}/${post_id}_${image_num}.$extension"
  if [ -e "$image_save_path" ]; then
    echo "File $image_save_path already exists, ignoring"
    [ "$update" == true ] && echo "Found an image we've already downloaded, we've updated to the lastest results. Finishing early" && exit 0
  else
    echo "Downloading image: $image"
    curl -so "$image_save_path" "$image"
    random=$(shuf -i 0-2 -n1)
    sleep $((random % 2))
  fi
}

echo "Downloading for $save_path"
sleep 2
mkdir -p "$save_path"
while true; do
  json_response=$(curl -s "https://www.reddit.com/r/$subreddit/.json?raw_json=1${next_page}" \
    -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:150.0) Gecko/20100101 Firefox/150.0')
  next_page="&after=$(echo "$json_response" | jq -er .data.after)"
  echo "setting after to $next_page"

  while read -r obj; do
    post_id=$(echo "$obj" | jq -er .data.name)
    echo "$obj" > "${save_path}/${post_id}.json"

    image_num=1
    # Order by gallery_data if it exists
    if echo "$obj" | jq -e '.data?.gallery_data // .data?.crosspost_parent_list[]?.gallery_data' &>/dev/null; then
      while read -r image_id; do
        while read -r image; do
          if echo "$image" | grep "$image_id"; then
            save_image "$image" "$post_id" "$image_num"
            ((image_num++))
          fi
        done < <(echo "$obj" | jq -ecr '.data?.media_metadata[]?.s?.u // .data?.crosspost_parent_list[]?.media_metadata[]?.s?.u // .data?.preview?.images[]?.source.url // .data?.crosspost_parent_list[]?.preview?.images[]?.source.url')
      done < <(echo "$obj" | jq -er '.data?.gallery_data?.items[]?.media_id // .data.crosspost_parent_list[].gallery_data.items[].media_id')
    else
      while read -r image; do
        save_image "$image" "$post_id" "$image_num"
        ((image_num++))
      done < <(echo "$obj" | jq -ecr '.data?.media_metadata[]?.s?.u // .data?.crosspost_parent_list[]?.media_metadata[]?.s?.u // .data?.preview?.images[]?.source.url // .data?.crosspost_parent_list[]?.preview?.images[]?.source.url')
    fi
    random=$(shuf -i 0-2 -n1)
    sleep $((random % 2))
  done < <(echo "$json_response" | jq -ec .data.children[])
done
