Coding challenges are a great resource for learning coding techniques and improve analytical thinking, this is a collection of challenges from different platforms.
Write a program to count the frequencies of unique words from standard input, then print them out with their frequencies, ordered most frequent first.
A word is a case insensitive sequence of non-white-space characters, it means
Hello
and hello
are the same word, but bye
and bye.
are different.
Rules:
Do not load the whole input to memory, read it by chunks (unless it is a small piece of text).
Don’t parallelize the algorithm.
Don’t implement fancy hash tables (unless the language doesn’t have a built-in hash table).
Use only the language standard library.
Input Format
The whole input is a single test case. It may contain multiple lines.
Constraints:
- Text will be only ASCII character.
Output Format
The output should be a line-separated list of words and their frequency.
Usage
$ go build -o solution main.go
$ wget 'https://raw.githubusercontent.com/benhoyt/countwords/master/kjvbible.txt'
$ for _ in $(seq 10); do cat kjvbible.txt; done | ./solution
Sample
input00.txt
The foo the foo the
defenestration the
output00.txt
the 4
foo 2
defenestration 1
Solution
main.go
package main
import (
"fmt"
"io"
"os"
"runtime/pprof"
"sort"
)
func main() {
if cpup := os.Getenv("CPUPROFILE"); cpup != "" {
f, err := os.Create(cpup)
if err != nil {
fmt.Fprintf(os.Stderr, "Can't create CPU profile file: %v", err)
os.Exit(1)
}
defer f.Close()
if err := pprof.StartCPUProfile(f); err != nil {
fmt.Fprintf(os.Stderr, "Can't profile CPU usage: %v", err)
os.Exit(1)
}
defer pprof.StopCPUProfile()
}
wc := NewWordCounter()
if _, err := io.Copy(wc, os.Stdin); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
for _, word := range wc.MostCommon() {
fmt.Println(word.Word, word.Count)
}
if memp := os.Getenv("MEMPROFILE"); memp != "" {
f, err := os.Create(memp)
if err != nil {
fmt.Fprintf(os.Stderr, "Can't create memory profile file: %v", err)
os.Exit(1)
}
defer f.Close()
if err := pprof.WriteHeapProfile(f); err != nil {
fmt.Fprintf(os.Stderr, "Can't profile memory usage: %v", err)
os.Exit(1)
}
}
}
type WordCount struct {
Word string
Count int
}
type WordCounter struct {
buf []byte
words map[string]*int
}
func NewWordCounter() *WordCounter {
return &WordCounter{
words: make(map[string]*int),
}
}
func (wc *WordCounter) addWord() {
if len(wc.buf) == 0 {
return
}
// Clean word buffer after counting a word.
defer func() { wc.buf = wc.buf[:0] }()
if n, ok := wc.words[string(wc.buf)]; ok {
*n++
return
}
n := 1
wc.words[string(wc.buf)] = &n
}
func (wc *WordCounter) MostCommon() []WordCount {
// Flush any data in the buffer.
wc.addWord()
counts := make([]WordCount, 0, len(wc.words))
for word, count := range wc.words {
counts = append(counts, WordCount{word, *count})
}
sort.Slice(counts, func(i, j int) bool {
return counts[i].Count > counts[j].Count
})
return counts
}
func (wc *WordCounter) Write(p []byte) (int, error) {
for _, b := range p {
if b == ' ' || b == '\n' {
wc.addWord()
continue
}
// To lower case.
if b >= 'A' && b <= 'Z' {
b += 'a' - 'A'
}
wc.buf = append(wc.buf, b)
}
return len(p), nil
}
Privacy policy
This site does not use third-party tracking cookies!
If you use private source products, worrying about privacy and using this products is like worrying about global warming and not recycling.. So just don’t.. 😒