#!/bin/sh if [ $# -ne 3 ]; then printf "Usage: %s input_file max_chars output_file\n" "$0" printf "Example: %s data.jsonl 10000 cleaned.jsonl\n" "$0" exit 1 fi input_file=$1 max_chars=$2 output_file=$3 # Check if input file exists if [ ! -f "$input_file" ]; then printf "Error: Input file '%s' not found\n" "$input_file" exit 1 fi # Process the file: keep lines shorter than max_chars awk -v max="$max_chars" 'length($0) <= max' "$input_file" >"$output_file" printf "Processing complete. Lines longer than %s characters have been removed.\n" "$max_chars" printf "Result saved to: %s\n" "$output_file"