kv-download/src/media/fetch.go
2024-11-30 12:54:32 -08:00

270 lines
6.7 KiB
Go

package media
import (
"crypto/md5"
"errors"
"fmt"
"github.com/dustin/go-humanize"
"golang.org/x/sync/errgroup"
"html/template"
"media-roller/src/utils"
"net/http"
"path/filepath"
"regexp"
"strings"
)
/**
This file will download the media from a URL and save it to disk.
*/
import (
"bytes"
"github.com/rs/zerolog/log"
"io"
"os"
"os/exec"
)
type Media struct {
Id string
Name string
SizeInBytes int64
HumanSize string
}
var fetchIndexTmpl = template.Must(template.ParseFiles("templates/media/index.html"))
// Where the media files are saved. Always has a trailing slash
var downloadDir = getDownloadDir()
var idCharSet = regexp.MustCompile(`^[a-zA-Z0-9]+$`).MatchString
func Index(w http.ResponseWriter, _ *http.Request) {
data := map[string]string{
"ytDlpVersion": CachedYtDlpVersion,
}
if err := fetchIndexTmpl.Execute(w, data); err != nil {
log.Error().Msgf("Error rendering template: %v", err)
http.Error(w, "Internal error", http.StatusInternalServerError)
}
}
func FetchMedia(w http.ResponseWriter, r *http.Request) {
url := getUrl(r)
media, ytdlpErrorMessage, err := getMediaResults(url)
data := map[string]interface{}{
"url": url,
"media": media,
"error": ytdlpErrorMessage,
"ytDlpVersion": CachedYtDlpVersion,
}
if err != nil {
_ = fetchIndexTmpl.Execute(w, data)
return
}
if err := fetchIndexTmpl.Execute(w, data); err != nil {
log.Error().Msgf("Error rendering template: %v", err)
http.Error(w, "Internal error", http.StatusInternalServerError)
}
}
func FetchMediaApi(w http.ResponseWriter, r *http.Request) {
url := getUrl(r)
medias, _, err := getMediaResults(url)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if len(medias) == 0 {
http.Error(w, "Media not found", http.StatusBadRequest)
return
}
// just take the first one
streamFileToClientById(w, r, medias[0].Id)
}
func getUrl(r *http.Request) string {
return strings.TrimSpace(r.URL.Query().Get("url"))
}
func getMediaResults(inputUrl string) ([]Media, string, error) {
if inputUrl == "" {
return nil, "", errors.New("missing URL")
}
url := utils.NormalizeUrl(inputUrl)
log.Info().Msgf("Got input '%s' and extracted '%s'", inputUrl, url)
// NOTE: This system is for a simple use case, meant to run at home. This is not a great design for a robust system.
// We are hashing the URL here and writing files to disk to a consistent directory based on the ID. You can imagine
// concurrent users would break this for the same URL. That's fine given this is for a simple home system.
// Future work can make this more sophisticated.
id := GetMD5Hash(url)
// Look to see if we already have the media on disk
medias, err := getAllFilesForId(id)
if err != nil {
return nil, "", err
}
if len(medias) == 0 {
// We don't, so go fetch it
errMessage := ""
id, errMessage, err = downloadMedia(url)
if err != nil {
return nil, errMessage, err
}
medias, err = getAllFilesForId(id)
if err != nil {
return nil, "", err
}
}
return medias, "", nil
}
// returns the ID of the file, and error message, and an error
func downloadMedia(url string) (string, string, error) {
// The id will be used as the name of the parent directory of the output files
id := GetMD5Hash(url)
name := getMediaDirectory(id) + "%(id)s.%(ext)s"
log.Info().Msgf("Downloading %s to %s", url, id)
cmd := exec.Command("yt-dlp",
"--format", "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
"--merge-output-format", "mp4",
"--trim-filenames", "40",
"--restrict-filenames",
"--write-info-json",
"--verbose",
"--output", name,
url)
var stdoutBuf, stderrBuf bytes.Buffer
stdoutIn, _ := cmd.StdoutPipe()
stderrIn, _ := cmd.StderrPipe()
var errStdout, errStderr error
stdout := io.MultiWriter(os.Stdout, &stdoutBuf)
stderr := io.MultiWriter(os.Stderr, &stderrBuf)
err := cmd.Start()
if err != nil {
log.Error().Msgf("Error starting command: %v", err)
return "", err.Error(), err
}
eg := errgroup.Group{}
eg.Go(func() error {
_, errStdout = io.Copy(stdout, stdoutIn)
return nil
})
_, errStderr = io.Copy(stderr, stderrIn)
_ = eg.Wait()
log.Info().Msgf("Done with %s", id)
err = cmd.Wait()
if err != nil {
log.Error().Err(err).Msgf("cmd.Run() failed with %s", err)
return "", strings.TrimSpace(stderrBuf.String()), err
} else if errStdout != nil {
log.Error().Msgf("failed to capture stdout: %v", errStdout)
} else if errStderr != nil {
log.Error().Msgf("failed to capture stderr: %v", errStderr)
}
return id, "", nil
}
// Returns the relative directory containing the media file, with a trailing slash.
// Id is expected to be pre validated
func getMediaDirectory(id string) string {
return downloadDir + id + "/"
}
// id is expected to be validated prior to calling this func
func getAllFilesForId(id string) ([]Media, error) {
root := getMediaDirectory(id)
file, err := os.Open(root)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, err
}
files, _ := file.Readdirnames(0) // 0 to read all files and folders
if len(files) == 0 {
return nil, errors.New("ID not found: " + id)
}
var medias []Media
// We expect two files to be produced for each video, a json manifest and an mp4.
for _, f := range files {
if !strings.HasSuffix(f, ".json") {
fi, err := os.Stat(root + f)
var size int64 = 0
if err == nil {
size = fi.Size()
}
media := Media{
Id: id,
Name: filepath.Base(f),
SizeInBytes: size,
HumanSize: humanize.Bytes(uint64(size)),
}
medias = append(medias, media)
}
}
return medias, nil
}
// id is expected to be validated prior to calling this func
// TODO: This needs to handle multiple files in the directory
func getFileFromId(id string) (string, error) {
root := getMediaDirectory(id)
file, err := os.Open(root)
if err != nil {
return "", err
}
files, _ := file.Readdirnames(0) // 0 to read all files and folders
if len(files) == 0 {
return "", errors.New("ID not found")
}
// We expect two files to be produced, a json manifest and an mp4. We want to return the mp4
// Sometimes the video file might not have an mp4 extension, so filter out the json file
for _, f := range files {
if !strings.HasSuffix(f, ".json") {
// TODO: This is just returning the first file found. We need to handle multiple
return root + f, nil
}
}
return "", errors.New("unable to find file")
}
func GetMD5Hash(url string) string {
return fmt.Sprintf("%x", md5.Sum([]byte(url)))
}
func isValidId(id string) bool {
return idCharSet(id)
}
func getDownloadDir() string {
dir := os.Getenv("MR_DOWNLOAD_DIR")
if dir != "" {
if !strings.HasSuffix(dir, "/") {
return dir + "/"
}
return dir
}
return "downloads/"
}