【Python】72-Line Code Lines Counter, Simple and Practical!

Translation Notice
This article was machine-translated using DeepSeek-R1.

Original Version: Authored in Chinese by myself

Accuracy Advisory: Potential discrepancies may exist between translations

Precedence: The Chinese text shall prevail in case of ambiguity

Feedback: Technical suggestions regarding translation quality are welcomed

Preface

Recently I suddenly wanted to know how many lines of code I’ve written, so I created this little tool…

Preparation

First consider the desired output:

Language	Lines	Size	Files
A	12345	300 KB	193
B	2345	165 KB	98

The program outputs a table sorted by code lines.
Basic code framework:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


# -*- encoding: utf-8 -*-
import ...

# Code lines counter class
class CodeLinesCounter(object):
    SIZES = [('B', 1), ('KB', 1024), ('MB', 1024**2), ('GB', 1024**3), ('TB', 1024**4)]
    
    def __init__(self, languages):
        self._languages = languages # Languages (dict, {file_extension: language})
        self._codelines = {suffix: (0, 0, 0) for suffix in languages} # Results, {suffix: (lines, size, files)}
        self._successful = self._error = 0 # Success/Error file counts
    
    # @param directory: Directory to scan
    # @param log: Whether to print logs
    def scan(self, directory, log=False):
        if log: print('Scanning', directory)
        pass
    
    def report(self): # Output results
        pass

counter = CodeLinesCounter(languages={'py': 'Python', 'c': 'C', 'cpp': 'C++', 'java': 'Java', 'js': 'JavaScript', 'html': 'HTML', 'css': 'CSS', 'txt': 'Plain text'})
counter.scan('E:/') # Scan E drive (Note: cannot use 'E:')
counter.report()

Now let’s move to the ==core implementation==

Implementation

File Scanning

First, we need to get file lists under root directory using os.walk:
os.walk(rootdir) returns a walker (iterable) containing file/directory lists for each subdirectory. Example:
For folder structure:

1
2
3
4
5
6
7
8


Folder
|   file1
|   Folder1
|       file2
|       file3
|   Folder2
    |   file4
    |   Folder3

Running:

1
2
3
4


import os

for root, dirs, files in os.walk('Folder'):
    print(root, dirs, files)

Output:

1
2
3
4


Folder					['Folder1', 'Folder2']	['file1']
Folder\Folder1			[]						['file2', 'file3']
Folder\Folder2			['Folder3']				['file4']
Folder\Folder2\Folder3	[]						[]

First element is current root, second is subdirectories, third is files.
Implementation code:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46


# -*- encoding: utf-8 -*-
from os.path import join, getsize, abspath
from os import walk

class CodeLinesCounter(object):
    SIZES = [('B', 1), ('KB', 1024), ('MB', 1024**2), ('GB', 1024**3), ('TB', 1024**4)]
    
    def __init__(self, languages):
        self._languages = languages
        self._results = {suffix: (0, 0, 0) for suffix in languages}
        self._successful = self._error = 0
    
    def scan(self, directory, log=False):
        if log: print('Scanning', directory)
        try:
            for root, _, files in walk(abspath(directory)):
                for filename in files:
                    suffix = filename[filename.rfind('.') + 1:]
                    filename = join(root, filename)
                    if suffix in self._results:
                        lines, size, numFiles = self._results[suffix]
                        lines += 1 # Temporary line count, set to 1 first
                        numFiles += 1
                        size += getsize(filename) # getsize returns bytes
                        self._results[suffix] = (lines, size, numFiles)
                    if log: print(filename)
        except KeyboardInterrupt:
            print('\nUser stopped operation')
        else:
            if log: print('Scan finished')
    
    def report(self):
        print('Language\tLines\tSize\tFiles')
        for suffix, (lines, size, files) in sorted(self._results.items(), key=lambda x: x[1], reverse=True):
            print(self._languages[suffix], lines, self.__format_size(size), files, sep='\t')

    # Unit conversion
    def __format_size(self, bytes):
        for suffix, size in self.SIZES:
            if bytes < size * 1024:
                return '%.2f %s' % (bytes / size, suffix)
        return '%.2f %s' % (bytes / self.SIZES[-1][1], 2, self.SIZES[-1][0])

counter = CodeLinesCounter(languages={'py': 'Python', 'c': 'C', 'cpp': 'C++', 'java': 'Java', 'js': 'JavaScript', 'html': 'HTML', 'css': 'CSS', 'txt': 'Plain text'})
counter.scan('E:/')
counter.report()

Sample output (manually formatted):

1
2
3
4
5
6
7
8
9


Language        Lines   Size            Files
C++             667     671.51 KB       667
Python          317     981.01 KB       317
HTML            38      466.52 KB       38
Plain text      34      90.69 KB        34
JavaScript      19      1.43 MB         19
CSS             9       341.04 KB       9
C               2       20.45 KB        2
Java            1       676.00 B        1

Next step: actual line counting.

Line Counting

Empty lines shouldn’t be counted. Modify line 23:

1
2
3
4


with open(filename, 'r', encoding='utf-8') as f: # Open with utf-8
    for line in f:
        if line and not line.isspace(): # Skip empty lines
            lines += 1

But when running:

1
2
3


Traceback (most recent call last):
  ...
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb5 in position 355: invalid start byte

Some files use GBK encoding. Improved code:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25


try:
    ln = 0
    with open(filename, 'r', encoding='utf-8') as f:
        for line in f:
            if line and not line.isspace():
                ln += 1
except UnicodeDecodeError: # Try GBK
    try:
        ln = 0
        with open(filename, 'r', encoding='gbk') as f:
            for line in f:
                if line and not line.isspace():
                    ln += 1
    except:
        print(filename, '[Error: unknown encoding]')
        self._error += 1
    else:
        lines += ln
except Exception as e:
    print(filename, '[Unknown error: %s]' % e)
    self._error += 1
    continue
lines += ln
if log: print(f'{filename} [{ln}]')
self._successful += 1

Now correct results:

1
2
3
4
5
6
7
8
9


Language        Lines   Size            Files
C++             35595   671.51 KB       667
JavaScript      24485   1.43 MB         19
Python          24130   982.16 KB       317
CSS             8203    341.04 KB       9
HTML            6138    466.52 KB       38
Plain text      741     90.69 KB        34
C               557     20.45 KB        2
Java            29      676.00 B        1

Final step: table formatting.

Table Formatting

Use PrettyTable library:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72


# -*- encoding: utf-8 -*-
from os.path import join, getsize, abspath
from os import walk
from prettytable import PrettyTable

class CodeLinesCounter(object):
    SIZES = [('B', 1), ('KB', 1024), ('MB', 1024**2), ('GB', 1024**3), ('TB', 1024**4)]
    
    def __init__(self, languages):
        self._languages = languages
        self._results = {suffix: (0, 0, 0) for suffix in languages}
        self._successful = self._error = 0
    
    def scan(self, directory, log=False):
        if log: print('Scanning', directory)
        try:
            for root, _, files in walk(abspath(directory)):
                for filename in files:
                    suffix = filename[filename.rfind('.') + 1:]
                    filename = join(root, filename)
                    if suffix in self._results:
                        lines, size, numFiles = self._results[suffix]
                        numFiles += 1
                        size += getsize(filename)
                        try:
                            ln = 0
                            with open(filename, 'r', encoding='utf-8') as f:
                                for line in f:
                                    if line and not line.isspace():
                                        ln += 1
                        except UnicodeDecodeError:
                            try:
                                ln = 0
                                with open(filename, 'r', encoding='gbk') as f:
                                    for line in f:
                                        if line and not line.isspace():
                                            ln += 1
                            except:
                                print(filename, '[Error: unknown encoding]')
                                self._error += 1
                            else:
                                lines += ln
                        except Exception as e:
                            print(filename, '[Unknown error: %s]' % e)
                            self._error += 1
                            continue
                        lines += ln
                        if log: print(f'{filename} [{ln}]')
                        self._successful += 1
                        self._results[suffix] = (lines, size, numFiles)
                    elif log:
                        print(filename, '[None]')
        except KeyboardInterrupt:
            print('\nUser stopped operation')
        else:
            if log: print('Scan finished')
    
    def report(self):
        table = PrettyTable(['Language', 'Lines', 'Size', 'Files'], title=f'Scan result (OK {self._successful}, Error {self._error})')
        for suffix, (lines, size, files) in sorted(self._results.items(), key=lambda x: x[1], reverse=True):
            table.add_row([self._languages[suffix], lines, self.__format_size(size), files])
        print(table)
    
    def __format_size(self, bytes):
        for suffix, size in self.SIZES:
            if bytes < size * 1024:
                return '%.2f %s' % (bytes / size, suffix)
        return '%.2f %s' % (bytes / self.SIZES[-1][1], 2, self.SIZES[-1][0])

counter = CodeLinesCounter(languages={'py': 'Python', 'c': 'C', 'cpp': 'C++', 'java': 'Java', 'js': 'JavaScript', 'html': 'HTML', 'css': 'CSS', 'txt': 'Plain text'})
counter.scan('E:/')
counter.report()

Output:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14


+----------------------------------------+
|     Scan result (OK 1087, Error 0)     |
+------------+-------+-----------+-------+
|  Language  | Lines |    Size   | Files |
+------------+-------+-----------+-------+
|    C++     | 35595 | 671.51 KB |  667  |
| JavaScript | 24485 |  1.43 MB  |   19  |
|   Python   | 24130 | 982.16 KB |  317  |
|    CSS     |  8203 | 341.04 KB |   9   |
|    HTML    |  6138 | 466.52 KB |   38  |
| Plain text |  741  |  90.69 KB |   34  |
|     C      |  557  |  20.45 KB |   2   |
|    Java    |   29  |  676.00 B |   1   |
+------------+-------+-----------+-------+

Conclusion

Final code (without comments):

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72


# -*- encoding: utf-8 -*-
from os.path import join, getsize, abspath
from os import walk
from prettytable import PrettyTable

class CodeLinesCounter(object):
    SIZES = [('B', 1), ('KB', 1024), ('MB', 1024**2), ('GB', 1024**3), ('TB', 1024**4)]
    
    def __init__(self, languages):
        self._languages = languages
        self._results = {suffix: (0, 0, 0) for suffix in languages}
        self._successful = self._error = 0
    
    def scan(self, directory, log=False):
        if log: print('Scanning', directory)
        try:
            for root, _, files in walk(abspath(directory)):
                for filename in files:
                    suffix = filename[filename.rfind('.') + 1:]
                    filename = join(root, filename)
                    if suffix in self._results:
                        lines, size, numFiles = self._results[suffix]
                        numFiles += 1
                        size += getsize(filename)
                        try:
                            ln = 0
                            with open(filename, 'r', encoding='utf-8') as f:
                                for line in f:
                                    if line and not line.isspace():
                                        ln += 1
                        except UnicodeDecodeError:
                            try:
                                ln = 0
                                with open(filename, 'r', encoding='gbk') as f:
                                    for line in f:
                                        if line and not line.isspace():
                                            ln += 1
                            except:
                                print(filename, '[Error: unknown encoding]')
                                self._error += 1
                            else:
                                lines += ln
                        except Exception as e:
                            print(filename, '[Unknown error: %s]' % e)
                            self._error += 1
                            continue
                        lines += ln
                        if log: print(f'{filename} [{ln}]')
                        self._successful += 1
                        self._results[suffix] = (lines, size, numFiles)
                    elif log:
                        print(filename, '[None]')
        except KeyboardInterrupt:
            print('\nUser stopped operation')
        else:
            if log: print('Scan finished')
    
    def report(self):
        table = PrettyTable(['Language', 'Lines', 'Size', 'Files'], title=f'Scan result (OK {self._successful}, Error {self._error})')
        for suffix, (lines, size, files) in sorted(self._results.items(), key=lambda x: x[1], reverse=True):
            table.add_row([self._languages[suffix], lines, self.__format_size(size), files])
        print(table)
    
    def __format_size(self, bytes):
        for suffix, size in self.SIZES:
            if bytes < size * 1024:
                return '%.2f %s' % (bytes / size, suffix)
        return '%.2f %s' % (bytes / self.SIZES[-1][1], 2, self.SIZES[-1][0])

counter = CodeLinesCounter(languages={'py': 'Python', 'c': 'C', 'cpp': 'C++', 'java': 'Java', 'js': 'JavaScript', 'html': 'HTML', 'css': 'CSS', 'txt': 'Plain text'})
counter.scan('E:/')
counter.report()

Future improvements:

Add regex file ignore
matplotlib visualization
PyQt5 GUI
… (Welcome valuable suggestions!)