Files
zh-en-wn-dataset/scripts/avg_length.sh
2025-02-13 17:25:01 +06:00

43 lines
882 B
Bash
Executable File

#!/bin/bash
if [ $# -ne 1 ]; then
echo "Usage: $0 <filename>"
exit 1
fi
file="$1"
# Check if file exists
if [ ! -f "$file" ]; then
echo "Error: File '$file' does not exist"
exit 1
fi
# Calculate average line length and store line lengths with their content
avg_length=$(awk '{
total_length += length($0)
count++
}
END {
if (count > 0) printf "%.0f", total_length/count
}' "$file")
# Find the line closest to average length
awk -v target=$avg_length '
{
curr_length = length($0)
diff = sqrt((curr_length - target)^2)
if (NR == 1 || diff < min_diff) {
min_diff = diff
closest_line = $0
line_num = NR
actual_length = curr_length
}
}
END {
print "Average line length: " target
print "Closest matching line (#" line_num "):"
print closest_line
print "Length: " actual_length
}' "$file"