Hadithi is an open-source, bash-based command-line tool that enables AI and ML developers to easily convert Youtube, Torrent, and enterprise videos into high-quality datasets for fine-tuning large language models (LLMs).
Live Previewif [[ $clipped_video_dataset_count -eq 0 ]];then
echo "no data found in "$clipped_video_dataset " directory"1>&2
else
if [[ $clipped_video_dataset_count -gt 0 ]];then
ls $clipped_video_dataset |sort -R |tail -$clipped_video_dataset_count \
|while read clip_folder; do
clip_dir_path=$clipped_video_dataset$clip_folder
clip_dir_count="$(find ${clip_dir_path} -maxdepth 1 -type f | wc -l)"
if [[ $clip_dir_count -gt 0 ]];then
ls $clip_dir_path |sort -R |tail -$clip_dir_count |while read clip_file; do
clip=$clip_dir_path"/"$clip_file
echo "'$clip'" >> "$log_directory"emptycleans.txt
done
fi
done
while IFS= read -r line; do
if [ -f "${line:1:${#line}-2}" ]; then
echo "${line:1:${#line}-2}" " ---- " $(basename -- ""${line:1:${#line}-6}"")
ffmpeg -i "${line:1:${#line}-2}" -vf "select=gt(scene\,0.001),setpts=N/(25*TB)" -strftime 1
$motioned_video_dataset"$(date +"%Y%m%d%H%M%S%N").mp4" < /dev/null
fi
done < "$log_directory"emptycleans.txt
#remove audio tracks
bash "$(pwd)"/noaudio.sh $DB $width $height $batch_size $video_duration $foundry_identifier
fi
fi