chore: stuff
This commit is contained in:
23
scripts/truncate.sh
Executable file
23
scripts/truncate.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/sh
|
||||
|
||||
if [ $# -ne 3 ]; then
|
||||
printf "Usage: %s input_file max_chars output_file\n" "$0"
|
||||
printf "Example: %s data.jsonl 10000 cleaned.jsonl\n" "$0"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
input_file=$1
|
||||
max_chars=$2
|
||||
output_file=$3
|
||||
|
||||
# Check if input file exists
|
||||
if [ ! -f "$input_file" ]; then
|
||||
printf "Error: Input file '%s' not found\n" "$input_file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Process the file: keep lines shorter than max_chars
|
||||
awk -v max="$max_chars" 'length($0) <= max' "$input_file" >"$output_file"
|
||||
|
||||
printf "Processing complete. Lines longer than %s characters have been removed.\n" "$max_chars"
|
||||
printf "Result saved to: %s\n" "$output_file"
|
||||
Reference in New Issue
Block a user