// Extract. Analyze. Report.
DATA IS POWER.
Logs, CSVs, system outputs—awk transforms raw text into meaningful data. It's not just a command, it's a complete programming language for text processing.
STRUCTURE FROM CHAOS.
awk automatically splits text into fields and lets you process them individually. Need to extract column 3 from a CSV? One command. Done.
Click a lesson to begin
What is awk? Fields and records.
Beginner$1, $2, $NF and field variables.
BeginnerPattern matching with conditions.
BeginnerNR, NF, FS, OFS, and more.
Beginnerprintf for formatted output.
BeginnerArithmetic and string operators.
IntermediateUser-defined variables and arrays.
Intermediateif, while, for loops.
IntermediateString, mathematical, time functions.
IntermediateCombine awk with other commands.
AdvancedCreate reports from log files.
AdvancedUse awk in bash scripts.
Advancedawk is a powerful text processing language. It reads input line by line, splits each line into fields, and lets you process or extract data based on patterns.
awk 'pattern { action }' file
awk 'pattern { action }'
# Print all lines (default action when pattern matches)
awk '1' file.txt
awk '{ print }' file.txt
# Print specific fields
awk '{ print $1 }' file.txt
# Work with pipes
ps aux | awk '{ print $1, $11 }'
1. What does $0 represent in awk?
$1 First field $2 Second field $3 Third field $NF Last field (NF = number of fields) $(NF-1) Second to last field $0 Entire line
# /etc/passwd extraction
awk -F: '{ print $1, $5 }' /etc/passwd
# Print first and last fields
ls -l | awk '{ print $1, $NF }'
# Print last field of each line
awk '{ print $NF }' file.txt
# Print everything except first field
awk '{ $1=""; print $0 }' file.txt
# -F sets field separator
awk -F: '{ print $1 }' /etc/passwd
# CSV processing
awk -F, '{ print $2 }' data.csv
# Space as separator (default)
awk -F' ' '{ print $2 }' file.txt
1. How do you print the last field in awk?
Patterns filter which lines are processed. Only lines matching the pattern get the action applied.
# Print lines containing 'root'
awk '/root/ { print }' /etc/passwd
# Print username and home for root
awk -F: '/root/ { print $1, $6 }' /etc/passwd
# Print lines where first field equals 'admin'
awk '$1 == "admin" { print }' file.txt
# Print lines where third field > 100
awk '$3 > 100 { print }' file.txt
# Print lines NOT containing 'nobody'
awk '!/nobody/ { print }' /etc/passwd
# Lines starting with 'a'
awk '/^a/ { print }' file.txt
# Lines ending with '.log'
awk '/\.log$/ { print }' file.txt
# Lines with 4 or more fields
awk 'NF >= 4 { print }' file.txt
1. How do you negate a pattern in awk?
NR Number of current Record (line number) NF Number of Fields in current record FS Field Separator (default: whitespace) RS Record Separator (default: newline) OFS Output Field Separator (default: space) ORS Output Record Separator (default: newline) $0 Entire current record $1..$NF Individual fields
# Add line numbers
awk '{ print NR, $0 }' file.txt
# Print number of fields per line
awk '{ print NF, $0 }' file.txt
# Change output separator to colon
awk -F: '{ print $1":"$3 }' /etc/passwd
# Same using OFS
awk -F: -v OFS=":" '{ print $1, $3 }' /etc/passwd
# BEGIN runs before processing
awk 'BEGIN { print "Processing..." } /pattern/ { print } END { print "Done" }'
# Initialize variables
awk 'BEGIN { sum=0 } { sum+=$1 } END { print sum }' file.txt
1. What does NR represent in awk?
# Basic print
awk '{ print $1 }' file.txt
# Print multiple items
awk '{ print $1, $2, $3 }' file.txt
# Concatenate without separator
awk '{ print $1 $2 }' file.txt
# Print literal text
awk '{ print "User:", $1, "Home:", $6 }' /etc/passwd
# printf doesn't add newline by default
awk '{ printf "%s ", $1 }' file.txt
awk '{ printf "%s\n", $1 }' file.txt
# Format specifiers
awk '{ printf "%-15s %10d\n", $1, $2 }' file.txt
# %-15s = left-aligned string, 15 chars
# %10d = right-aligned integer, 10 chars
%s String %d Integer %f Floating point %-10s Left-aligned, 10 chars %10s Right-aligned, 10 chars %.2f 2 decimal places
1. What does printf add that print doesn't?
+ Addition - Subtraction * Multiplication / Division % Modulo ++ Increment -- Decrement += Add and assign -= Subtract and assign
# Add 10 to first field
awk '{ $1 = $1 + 10; print }' file.txt
# Calculate average
awk '{ sum+=$1; count++ } END { print sum/count }' file.txt
# Integer division
awk '{ print int($1 / $2) }' file.txt
~ Matches regex !~ Does not match regex == Equals != Not equals < Less than > Greater than <= Less than or equal >= Greater than or equal
1. What operator checks regex match?
# Initialize and use
awk '{ sum = $1 + $2; print sum }' file.txt
# String variables
awk '{ name = $1; print "Hello " name }' file.txt
# Arrays
awk '{ arr[$1] = $2 } END { for (k in arr) print k, arr[k] }' file.txt
# Count occurrences
awk '{ count[$1]++ } END { for (item in count) print item, count[item] }' file.txt
# Sum by category
awk '{ sum[$2] += $3 } END { for (cat in sum) print cat, sum[cat] }' file.txt
# Check if key exists
if (key in array) print "exists"
# Read from another file
awk '{ getline line < "other.txt"; print $0, line }' file.txt
1. How do you count occurrences with awk arrays?
awk '{
if ($3 > 100) {
print "High:", $0
} else if ($3 > 50) {
print "Medium:", $0
} else {
print "Low:", $0
}
}' file.txt
awk '{
i = 1
while (i <= NF) {
if ($i ~ /error/) print "Error in field", i
i++
}
}' file.txt
# C-style for loop
awk '{
for (i = 1; i <= NF; i++) {
sum += $i
}
print sum/NF
}' file.txt
# For iterating arrays
awk '{ for (k in arr) print k, arr[k] }' file.txt
1. What loop structure does awk support?
length(s) Length of string substr(s,i,n) Substring from position i, length n split(s,arr,sep) Split string into array gsub(r,s,t) Global substitute sub(r,s,t) First substitute tolower(s) Convert to lowercase toupper(s) Convert to uppercase
# Replace first occurrence
awk '{ sub(/old/, "new"); print }' file.txt
# Replace all occurrences
awk '{ gsub(/old/, "new"); print }' file.txt
# Get substring (chars 1-5)
awk '{ print substr($1, 1, 5) }' file.txt
# Length of each line
awk '{ print length($0) }' file.txt
sin(x) sine cos(x) cosine sqrt(x) square root int(x) integer part rand() random number 0-1 srand(x) seed for rand
1. What function does global substitution?
# ps with awk
ps aux | awk '{ print $1, $11 }' | sort | uniq
# df with awk
df -h | awk '{ print $5, $6 }' | sort -rn
# du with awk
du -sh /* | awk '{ print $1 }' | sort -h
# log analysis
cat app.log | awk '/ERROR/ { print $1, $2, $NF }'
# Find top consumers
ps aux --sort=-%cpu | awk 'NR==1 { print } NR>1 && $3>10 { print }'
# Extract IP addresses
netstat -tuln | awk '/LISTEN/ { print $4 }'
# Sum column from output
grep "error" app.log | awk '{ sum+=$2 } END { print sum }'
1. Why combine awk with other commands?
awk 'BEGIN { print "=== Error Report ===" }
/ERROR/ { errors++ }
/WARNING/ { warnings++ }
/INFO/ { info++ }
END {
print "Errors:", errors
print "Warnings:", warnings
print "Info:", info
}' app.log
# sales.csv: product,region,amount
awk -F, '{
product[$1] += $3
region[$2] += $3
total += $3
}
END {
print "=== Sales by Product ==="
for (p in product) print p, product[p]
print "\n=== Sales by Region ==="
for (r in region) print r, region[r]
print "\nTotal:", total
}' sales.csv
1. When does the END block execute?
#!/usr/bin/awk -f
# Calculate statistics
BEGIN {
FS=","
print "Processing data..."
}
{
sum += $2
count++
if ($2 > max) max = $2
if ($2 < min || min == 0) min = $2
}
END {
print "Records:", count
print "Average:", sum/count
print "Max:", max
print "Min:", min
}
#!/bin/bash
# Process log file
LOGFILE=${1:-app.log}
echo "=== Log Summary ==="
awk '/ERROR/ {e++} /WARNING/ {w++} /INFO/ {i++}
END {print "Errors:", e+0, "Warnings:", w+0, "Info:", i+0}' "$LOGFILE"
echo "=== Top 5 Error Messages ==="
awk '/ERROR/ {print $NF}' "$LOGFILE" | sort | uniq -c | sort -rn | head -5
You've mastered awk! You now understand:
awk is a complete text processing language. When you need to extract data, create reports, or transform structured text, awk handles it elegantly.
Master awk and you'll turn raw log files and CSV data into meaningful insights instantly.
Extract. Analyze. Report.