Reading a file line by line in Go
Question
I'm unable to find file.ReadLine
function in Go. I can figure out how to quickly write one, but I am just wondering if I'm overlooking something here. How does one read a file line by line?
Accepted Answer
NOTE: The accepted answer was correct in early versions of Go. See the highest voted answer contains the more recent idiomatic way to achieve this.
There is function ReadLine in package bufio
.
Please note that if the line does not fit into the read buffer, the function will return an incomplete line. If you want to always read a whole line in your program by a single call to a function, you will need to encapsulate the ReadLine
function into your own function which calls ReadLine
in a for-loop.
bufio.ReadString('\n')
isn't fully equivalent to ReadLine
because ReadString
is unable to handle the case when the last line of a file does not end with the newline character.
Popular Answer
In Go 1.1 and newer the most simple way to do this is with a bufio.Scanner
. Here is a simple example that reads lines from a file:
package main
import (
"bufio"
"fmt"
"log"
"os"
)
func main() {
file, err := os.Open("/path/to/file.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
fmt.Println(scanner.Text())
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
This is the cleanest way to read from a Reader
line by line.
There is one caveat: Scanner does not deal well with lines longer than 65536 characters. If that is an issue for you then then you should probably roll your own on top of Reader.Read()
.
Read more... Read less...
Use:
reader.ReadString('\n')
- If you don't mind that the line could be very long (i.e. use a lot of RAM). It keeps the
\n
at the end of the string returned.
- If you don't mind that the line could be very long (i.e. use a lot of RAM). It keeps the
reader.ReadLine()
- If you care about limiting RAM consumption and don't mind the extra work of handling the case where the line is greater than the reader's buffer size.
I tested the various solutions suggested by writing a program to test the scenarios which are identified as problems in other answers:
- A file with a 4MB line.
- A file which doesn't end with a line break.
I found that:
- The
Scanner
solution does not handle long lines. - The
ReadLine
solution is complex to implement. - The
ReadString
solution is the simplest and works for long lines.
Here is code which demonstrates each solution, it can be run via go run main.go
, or at https://play.golang.org/p/RAW3sGblbas
package main
import (
"bufio"
"bytes"
"fmt"
"io"
"os"
)
func readFileWithReadString(fn string) (err error) {
fmt.Println("readFileWithReadString")
file, err := os.Open(fn)
if err != nil {
return err
}
defer file.Close()
// Start reading from the file with a reader.
reader := bufio.NewReader(file)
var line string
for {
line, err = reader.ReadString('\n')
if err != nil && err != io.EOF {
break
}
// Process the line here.
fmt.Printf(" > Read %d characters\n", len(line))
fmt.Printf(" > > %s\n", limitLength(line, 50))
if err != nil {
break
}
}
if err != io.EOF {
fmt.Printf(" > Failed with error: %v\n", err)
return err
}
return
}
func readFileWithScanner(fn string) (err error) {
fmt.Println("readFileWithScanner (scanner fails with long lines)")
// Don't use this, it doesn't work with long lines...
file, err := os.Open(fn)
if err != nil {
return err
}
defer file.Close()
// Start reading from the file using a scanner.
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
// Process the line here.
fmt.Printf(" > Read %d characters\n", len(line))
fmt.Printf(" > > %s\n", limitLength(line, 50))
}
if scanner.Err() != nil {
fmt.Printf(" > Failed with error %v\n", scanner.Err())
return scanner.Err()
}
return
}
func readFileWithReadLine(fn string) (err error) {
fmt.Println("readFileWithReadLine")
file, err := os.Open(fn)
if err != nil {
return err
}
defer file.Close()
// Start reading from the file with a reader.
reader := bufio.NewReader(file)
for {
var buffer bytes.Buffer
var l []byte
var isPrefix bool
for {
l, isPrefix, err = reader.ReadLine()
buffer.Write(l)
// If we've reached the end of the line, stop reading.
if !isPrefix {
break
}
// If we're at the EOF, break.
if err != nil {
if err != io.EOF {
return err
}
break
}
}
line := buffer.String()
// Process the line here.
fmt.Printf(" > Read %d characters\n", len(line))
fmt.Printf(" > > %s\n", limitLength(line, 50))
if err == io.EOF {
break
}
}
if err != io.EOF {
fmt.Printf(" > Failed with error: %v\n", err)
return err
}
return
}
func main() {
testLongLines()
testLinesThatDoNotFinishWithALinebreak()
}
func testLongLines() {
fmt.Println("Long lines")
fmt.Println()
createFileWithLongLine("longline.txt")
readFileWithReadString("longline.txt")
fmt.Println()
readFileWithScanner("longline.txt")
fmt.Println()
readFileWithReadLine("longline.txt")
fmt.Println()
}
func testLinesThatDoNotFinishWithALinebreak() {
fmt.Println("No linebreak")
fmt.Println()
createFileThatDoesNotEndWithALineBreak("nolinebreak.txt")
readFileWithReadString("nolinebreak.txt")
fmt.Println()
readFileWithScanner("nolinebreak.txt")
fmt.Println()
readFileWithReadLine("nolinebreak.txt")
fmt.Println()
}
func createFileThatDoesNotEndWithALineBreak(fn string) (err error) {
file, err := os.Create(fn)
if err != nil {
return err
}
defer file.Close()
w := bufio.NewWriter(file)
w.WriteString("Does not end with linebreak.")
w.Flush()
return
}
func createFileWithLongLine(fn string) (err error) {
file, err := os.Create(fn)
if err != nil {
return err
}
defer file.Close()
w := bufio.NewWriter(file)
fs := 1024 * 1024 * 4 // 4MB
// Create a 4MB long line consisting of the letter a.
for i := 0; i < fs; i++ {
w.WriteRune('a')
}
// Terminate the line with a break.
w.WriteRune('\n')
// Put in a second line, which doesn't have a linebreak.
w.WriteString("Second line.")
w.Flush()
return
}
func limitLength(s string, length int) string {
if len(s) < length {
return s
}
return s[:length]
}
I tested on:
- go version go1.15 darwin/amd64
- go version go1.7 windows/amd64
- go version go1.6.3 linux/amd64
- go version go1.7.4 darwin/amd64
The test program outputs:
Long lines
readFileWithReadString
> Read 4194305 characters
> > aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
> Read 12 characters
> > Second line.
readFileWithScanner (scanner fails with long lines)
> Failed with error bufio.Scanner: token too long
readFileWithReadLine
> Read 4194304 characters
> > aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
> Read 12 characters
> > Second line.
> Read 0 characters
> >
No linebreak
readFileWithReadString
> Read 28 characters
> > Does not end with linebreak.
readFileWithScanner (scanner fails with long lines)
> Read 28 characters
> > Does not end with linebreak.
readFileWithReadLine
> Read 28 characters
> > Does not end with linebreak.
> Read 0 characters
> >
EDIT: As of go1.1, the idiomatic solution is to use bufio.Scanner
I wrote up a way to easily read each line from a file. The Readln(*bufio.Reader) function returns a line (sans \n) from the underlying bufio.Reader struct.
// Readln returns a single line (without the ending \n)
// from the input buffered reader.
// An error is returned iff there is an error with the
// buffered reader.
func Readln(r *bufio.Reader) (string, error) {
var (isPrefix bool = true
err error = nil
line, ln []byte
)
for isPrefix && err == nil {
line, isPrefix, err = r.ReadLine()
ln = append(ln, line...)
}
return string(ln),err
}
You can use Readln to read every line from a file. The following code reads every line in a file and outputs each line to stdout.
f, err := os.Open(fi)
if err != nil {
fmt.Printf("error opening file: %v\n",err)
os.Exit(1)
}
r := bufio.NewReader(f)
s, e := Readln(r)
for e == nil {
fmt.Println(s)
s,e = Readln(r)
}
Cheers!
There two common way to read file line by line.
- Use bufio.Scanner
- Use ReadString/ReadBytes/... in bufio.Reader
In my testcase, ~250MB, ~2,500,000 lines, bufio.Scanner(time used: 0.395491384s) is faster than bufio.Reader.ReadString(time_used: 0.446867622s).
Source code: https://github.com/xpzouying/go-practice/tree/master/read_file_line_by_line
Read file use bufio.Scanner,
func scanFile() {
f, err := os.OpenFile(logfile, os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
sc := bufio.NewScanner(f)
for sc.Scan() {
_ = sc.Text() // GET the line string
}
if err := sc.Err(); err != nil {
log.Fatalf("scan file error: %v", err)
return
}
}
Read file use bufio.Reader,
func readFileLines() {
f, err := os.OpenFile(logfile, os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
rd := bufio.NewReader(f)
for {
line, err := rd.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
log.Fatalf("read file line error: %v", err)
return
}
_ = line // GET the line string
}
}
Example from this gist
func readLine(path string) {
inFile, err := os.Open(path)
if err != nil {
fmt.Println(err.Error() + `: ` + path)
return
}
defer inFile.Close()
scanner := bufio.NewScanner(inFile)
for scanner.Scan() {
fmt.Println(scanner.Text()) // the line
}
}
but this gives an error when there is a line that larger than Scanner's buffer.
When that happened, what I do is use reader := bufio.NewReader(inFile)
create and concat my own buffer either using ch, err := reader.ReadByte()
or len, err := reader.Read(myBuffer)
Another way that I use (replace os.Stdin with file like above), this one concats when lines are long (isPrefix) and ignores empty lines:
func readLines() []string {
r := bufio.NewReader(os.Stdin)
bytes := []byte{}
lines := []string{}
for {
line, isPrefix, err := r.ReadLine()
if err != nil {
break
}
bytes = append(bytes, line...)
if !isPrefix {
str := strings.TrimSpace(string(bytes))
if len(str) > 0 {
lines = append(lines, str)
bytes = []byte{}
}
}
}
if len(bytes) > 0 {
lines = append(lines, string(bytes))
}
return lines
}
You can also use ReadString with \n as a separator:
f, err := os.Open(filename)
if err != nil {
fmt.Println("error opening file ", err)
os.Exit(1)
}
defer f.Close()
r := bufio.NewReader(f)
for {
path, err := r.ReadString(10) // 0x0A separator = newline
if err == io.EOF {
// do something here
break
} else if err != nil {
return err // if you return error
}
}