emacs.d/clones/ruslanspivak.com/eofnotchar/index.html

683 lines
45 KiB
HTML
Raw Normal View History

2022-10-07 19:32:11 +02:00
<!DOCTYPE html>
<html lang="en"
xmlns:og="http://ogp.me/ns#"
xmlns:fb="https://www.facebook.com/2008/fbml">
<head>
<title>EOF is not a character - Ruslan's Blog</title>
<!-- Using the latest rendering mode for IE -->
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="canonical" href="index.html">
<meta name="author" content="Ruslan Spivak" />
<meta name="description" content="I was reading Computer Systems: A Programmers Perspective the other day and in the chapter on Unix I/O the authors mention that there is no explicit “EOF character” at the end of a file." />
<meta property="og:site_name" content="Ruslan's Blog" />
<meta property="og:type" content="article"/>
<meta property="og:title" content="EOF is not a character"/>
<meta property="og:url" content="https://ruslanspivak.com/eofnotchar/"/>
<meta property="og:description" content="I was reading Computer Systems: A Programmers Perspective the other day and in the chapter on Unix I/O the authors mention that there is no explicit “EOF character” at the end of a file."/>
<meta property="article:published_time" content="2020-03-01" />
<meta property="article:section" content="blog" />
<meta property="article:author" content="Ruslan Spivak" />
<meta property="og:image"
content="https://ruslanspivak.com/eofnotchar/eofnotchar_notachar.png"/>
<meta name="twitter:card" content="summary">
<meta name="twitter:domain" content="https://ruslanspivak.com">
<meta property="twitter:image"
content="https://ruslanspivak.com/eofnotchar/eofnotchar_notachar.png"/>
<!-- Bootstrap -->
<link rel="stylesheet" href="../theme/css/bootstrap.min.css" type="text/css"/>
<link href="../theme/css/font-awesome.min.css" rel="stylesheet">
<link href="../theme/css/pygments/tango.css" rel="stylesheet">
<link href="../theme/css/typogrify.css" rel="stylesheet">
<link rel="stylesheet" href="../theme/css/style.css" type="text/css"/>
<link href="../static/custom.css" rel="stylesheet">
<link href="../feeds/all.atom.xml" type="application/atom+xml" rel="alternate"
title="Ruslan's Blog ATOM Feed"/>
</head>
<body>
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-ex1-collapse">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="../index.html" class="navbar-brand">
Ruslan's Blog </a>
</div>
<div class="collapse navbar-collapse navbar-ex1-collapse">
<ul class="nav navbar-nav">
</ul>
<ul class="nav navbar-nav navbar-right">
<li><a href="../pages/about.html"><i class="fa fa-question"></i><span class="icon-label">About</span></a></li>
<li><a href="../archives.html"><i class="fa fa-th-list"></i><span class="icon-label">Archives</span></a></li>
</ul>
</div>
<!-- /.navbar-collapse -->
</div>
</div> <!-- /.navbar -->
<!-- Banner -->
<!-- End Banner -->
<div class="container">
<div class="row">
<div class="col-sm-9">
<section id="content">
<article>
<header class="page-header">
<h1>
<a href="index.html"
rel="bookmark"
title="Permalink to EOF is not a character">
<span class="caps">EOF</span> is not a&nbsp;character
</a>
</h1>
</header>
<div class="entry-content">
<div class="panel">
<div class="panel-body">
<footer class="post-info">
<span class="label label-default">Date</span>
<span class="published">
<i class="fa fa-calendar"></i><time datetime="2020-03-01T11:53:00-05:00"> Sun, March 01, 2020</time>
</span>
</footer><!-- /.post-info --> </div>
</div>
<p><strong>Update Mar 14, 2020</strong>: I&#8217;m working on an update to the article based on all the feedback I&#8217;ve received so far. Stay tuned!
<br/>
<br/></p>
<p>I was reading <em>Computer Systems: A Programmer&#8217;s Perspective</em> the other day and in the chapter on Unix I/O the authors mention that <strong><em>there is no explicit &#8220;<span class="caps">EOF</span> character&#8221; at the end of a file</em></strong>.</p>
<p><img alt="" src="eofnotchar_notachar.png" width="640"></p>
<p>If you&#8217;ve spent some time reading and/or playing with Unix I/O and have written some C programs that read text files and run on Unix/Linux, that statement is probably obvious. But let&#8217;s take a closer look at the following two points related to the statement in the&nbsp;book:</p>
<ol>
<li><span class="caps">EOF</span> is not a&nbsp;character</li>
<li><span class="caps">EOF</span> is not a character you find at the end of a&nbsp;file</li>
</ol>
<p><br/>
1. Why would anyone say or think that <span class="caps">EOF</span> is a character? I think it may be because in some C programs you can find code that explicitly checks for <span class="caps">EOF</span> using <em>getchar()</em> and <em>getc()</em>&nbsp;routines:</p>
<div class="highlight"><pre><span></span> <span class="cp">#include</span> <span class="cpf">&lt;stdio.h&gt;</span><span class="cp"></span>
<span class="p">...</span>
<span class="k">while</span> <span class="p">((</span><span class="n">c</span> <span class="o">=</span> <span class="n">getchar</span><span class="p">())</span> <span class="o">!=</span> <span class="n">EOF</span><span class="p">)</span>
<span class="n">putchar</span><span class="p">(</span><span class="n">c</span><span class="p">);</span>
<span class="n">OR</span>
<span class="kt">FILE</span> <span class="o">*</span><span class="n">fp</span><span class="p">;</span>
<span class="kt">int</span> <span class="n">c</span><span class="p">;</span>
<span class="p">...</span>
<span class="k">while</span> <span class="p">((</span><span class="n">c</span> <span class="o">=</span> <span class="n">getc</span><span class="p">(</span><span class="n">fp</span><span class="p">))</span> <span class="o">!=</span> <span class="n">EOF</span><span class="p">)</span>
<span class="n">putc</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="n">stdout</span><span class="p">);</span>
</pre></div>
<p>And if you check the <em>man</em> page for <em>getchar()</em> or <em>getc()</em>, you&#8217;ll read that both routines get the next character from the input stream. So that could be what leads to a confusion about the nature of <span class="caps">EOF</span>, but that&#8217;s just me speculating. Let&#8217;s get back to the point that <span class="caps">EOF</span> is not a&nbsp;character.</p>
<p>What is a character anyway? A <em>character</em> is the smallest component of a text. &#8216;A&#8217;, &#8216;a&#8217;, &#8216;B&#8217;, &#8216;b&#8217; are all different characters. A character has a numeric value that is called a <a href="https://docs.python.org/3/howto/unicode.html"><em>code point</em> </a>in the Unicode standard. For example, the English character &#8216;A&#8217; has a numeric value of 65 in decimal. You can check this quickly in a Python&nbsp;shell:</p>
<div class="highlight"><pre><span></span>$python
&gt;&gt;&gt; ord(&#39;A&#39;)
65
&gt;&gt;&gt; chr(65)
&#39;A&#39;
</pre></div>
<p><br/>
Or you could look it up in the <span class="caps">ASCII</span> table on your Unix/Linux&nbsp;box:</p>
<div class="highlight"><pre><span></span>$ man ascii
</pre></div>
<p><img alt="" src="eofnotchar_asciitable.png" width="640"></p>
<p><br/></p>
<p>Let&#8217;s check the value of <span class="caps">EOF</span> by writing a little C program. In <span class="caps">ANSI</span> C, <span class="caps">EOF</span> is defined in <em>&lt;stdio.h></em> as part of the standard library. Its value is usually -1. Save the following code in file <em>printeof.c</em>, compile it, and run&nbsp;it:</p>
<div class="highlight"><pre><span></span><span class="cp">#include</span> <span class="cpf">&lt;stdio.h&gt;</span><span class="cp"></span>
<span class="kt">int</span> <span class="nf">main</span><span class="p">(</span><span class="kt">int</span> <span class="n">argc</span><span class="p">,</span> <span class="kt">char</span> <span class="o">*</span><span class="n">argv</span><span class="p">[])</span>
<span class="p">{</span>
<span class="n">printf</span><span class="p">(</span><span class="s">&quot;EOF value on my system: %d</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">EOF</span><span class="p">);</span>
<span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
<span class="p">}</span>
</pre></div>
<p><br/></p>
<div class="highlight"><pre><span></span>$ gcc -o printeof printeof.c
$ ./printeof
EOF value on my system: -1
</pre></div>
<p>Okay, so on my system the value is -1 (I tested it both on Mac <span class="caps">OS</span> and Ubuntu Linux). Is there a character with a numerical value of -1? Again, you could check the available numeric values in the <span class="caps">ASCII</span> table or check the official Unicode page to find the legitimate range of numeric values for representing characters. But let&#8217;s fire up a Python shell and use the built-in <em>chr()</em> function to return a character for&nbsp;-1:</p>
<div class="highlight"><pre><span></span>$ python
&gt;&gt;&gt; chr<span class="o">(</span>-1<span class="o">)</span>
Traceback <span class="o">(</span>most recent call last<span class="o">)</span>:
File <span class="s2">&quot;&lt;stdin&gt;&quot;</span>, line <span class="m">1</span>, in &lt;module&gt;
ValueError: chr<span class="o">()</span> arg not in range<span class="o">(</span>0x110000<span class="o">)</span>
</pre></div>
<p>As expected, there is no character with a numeric value of -1. Okay, so <span class="caps">EOF</span> (as seen in C programs) is not a&nbsp;character.</p>
<p>Onto the second&nbsp;point.</p>
<p><br/>
2. Is <span class="caps">EOF</span> a character that you can find at the end of a file? I think at this point you already know the answer, but let&#8217;s double check our&nbsp;assumption.</p>
<p>Let&#8217;s take a simple text file <a href="https://github.com/rspivak/2x25/blob/master/eofnotchar/helloworld.txt">helloworld.txt</a> and get a hexdump of the contents of the file. We can use <em>xxd</em> for&nbsp;that:</p>
<div class="highlight"><pre><span></span>$ cat helloworld.txt
Hello world!
$ xxd helloworld.txt
<span class="m">00000000</span>: <span class="m">4865</span> 6c6c 6f20 776f 726c <span class="m">6421</span> 0a Hello world!.
</pre></div>
<p>As you can see, the last character at the end of the file is the hex <em>0a</em>. You can find in the <span class="caps">ASCII</span> table that <em>0a</em> represents <em>nl,</em> the newline character. Or you can check it in a Python&nbsp;shell:</p>
<div class="highlight"><pre><span></span>$ python
&gt;&gt;&gt; chr<span class="o">(</span>0x0a<span class="o">)</span>
<span class="s1">&#39;\n&#39;</span>
</pre></div>
<p><br/>
Okay. If <span class="caps">EOF</span> is not a character and it&#8217;s not a character that you find at the end of a file, what is it&nbsp;then?</p>
<p><strong><em><span class="caps">EOF</span> (end-of-file)</em></strong> is a condition provided by the kernel that can be detected by an&nbsp;application.</p>
<p>Let&#8217;s see how we can detect the <span class="caps">EOF</span> condition in various programming languages when reading a text file using high-level I/O routines provided by the languages. For this purpose, we&#8217;ll write a very simple <a href="https://en.wikipedia.org/wiki/Cat_(Unix)"><em>cat</em></a> version called <em>mcat</em> that reads an <span class="caps">ASCII</span>-encoded text file byte by byte (character by character) and explicitly checks for <span class="caps">EOF</span>. Let&#8217;s write our <em>cat</em> version in the following programming&nbsp;languages:</p>
<ul>
<li><span class="caps">ANSI</span>&nbsp;C</li>
<li>Python</li>
<li>Go</li>
<li>JavaScript&nbsp;(node.js)</li>
</ul>
<p>You can find source code for all of the examples in this article on <a href="https://github.com/rspivak/2x25/tree/master/eofnotchar">GitHub</a>. Okay, let&#8217;s get started with the venerable C programming&nbsp;language.</p>
<ol>
<li>
<p><span class="caps">ANSI</span> C (a modified <em>cat</em> version from <em>The C Programming Language</em>&nbsp;book)</p>
<div class="highlight"><pre><span></span><span class="cm">/* mcat.c */</span>
<span class="cp">#include</span> <span class="cpf">&lt;stdio.h&gt;</span><span class="cp"></span>
<span class="kt">int</span> <span class="nf">main</span><span class="p">(</span><span class="kt">int</span> <span class="n">argc</span><span class="p">,</span> <span class="kt">char</span> <span class="o">*</span><span class="n">argv</span><span class="p">[])</span>
<span class="p">{</span>
<span class="kt">FILE</span> <span class="o">*</span><span class="n">fp</span><span class="p">;</span>
<span class="kt">int</span> <span class="n">c</span><span class="p">;</span>
<span class="k">if</span> <span class="p">((</span><span class="n">fp</span> <span class="o">=</span> <span class="n">fopen</span><span class="p">(</span><span class="o">*++</span><span class="n">argv</span><span class="p">,</span> <span class="s">&quot;r&quot;</span><span class="p">))</span> <span class="o">==</span> <span class="nb">NULL</span><span class="p">)</span> <span class="p">{</span>
<span class="n">printf</span><span class="p">(</span><span class="s">&quot;mcat: can&#39;t open %s</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="o">*</span><span class="n">argv</span><span class="p">);</span>
<span class="k">return</span> <span class="mi">1</span><span class="p">;</span>
<span class="p">}</span>
<span class="k">while</span> <span class="p">((</span><span class="n">c</span> <span class="o">=</span> <span class="n">getc</span><span class="p">(</span><span class="n">fp</span><span class="p">))</span> <span class="o">!=</span> <span class="n">EOF</span><span class="p">)</span>
<span class="n">putc</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="n">stdout</span><span class="p">);</span>
<span class="n">fclose</span><span class="p">(</span><span class="n">fp</span><span class="p">);</span>
<span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
<span class="p">}</span>
</pre></div>
<p>Compile</p>
<div class="highlight"><pre><span></span>$ gcc -o mcat mcat.c
</pre></div>
<p>Run</p>
<div class="highlight"><pre><span></span>$ ./mcat helloworld.txt
Hello world!
</pre></div>
<p><br/>
Quick explanation of the code&nbsp;above:</p>
<ul>
<li>The program opens a file passed as a command line&nbsp;argument</li>
<li>The <em>while</em> loop copies data from the file to the standard output one byte at a time until it reaches the end of the&nbsp;file.</li>
<li>On reaching <span class="caps">EOF</span>, the program closes the file and&nbsp;terminates</li>
</ul>
</li>
<li>
<p>Python&nbsp;3</p>
<p>Python doesn&#8217;t have a mechanism to explicitly check for <span class="caps">EOF</span> like in <span class="caps">ANSI</span> C, but if you read a text file one character at a time, you can determine the <em>end-of-file</em> condition by checking if the character read is&nbsp;empty:</p>
<div class="highlight"><pre><span></span><span class="c1"># mcat.py</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="k">as</span> <span class="n">fin</span><span class="p">:</span>
<span class="k">while</span> <span class="bp">True</span><span class="p">:</span>
<span class="n">c</span> <span class="o">=</span> <span class="n">fin</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># read max 1 char</span>
<span class="k">if</span> <span class="n">c</span> <span class="o">==</span> <span class="s1">&#39;&#39;</span><span class="p">:</span> <span class="c1"># EOF</span>
<span class="k">break</span>
<span class="k">print</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="n">end</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">)</span>
</pre></div>
<p><br/></p>
<div class="highlight"><pre><span></span>$ python mcat.py helloworld.txt
Hello world!
</pre></div>
<p>Python 3.8+ (a shorter version of the above using <a href="https://docs.python.org/3/whatsnew/3.8.html#assignment-expressions">the walrus operator</a>):</p>
<div class="highlight"><pre><span></span><span class="c1"># mcat38.py</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span> <span class="k">as</span> <span class="n">fin</span><span class="p">:</span>
<span class="k">while</span> <span class="p">(</span><span class="n">c</span> <span class="p">:</span><span class="o">=</span> <span class="n">fin</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span> <span class="o">!=</span> <span class="s1">&#39;&#39;</span><span class="p">:</span> <span class="c1"># read max 1 char at a time until EOF</span>
<span class="k">print</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="n">end</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">)</span>
</pre></div>
<p><br/></p>
<div class="highlight"><pre><span></span>$ python3.8 mcat38.py helloworld.txt
Hello world!
</pre></div>
</li>
<li>
<p>Go</p>
<p>In Go we can explicitly check if the error returned by <a href="https://tour.golang.org/methods/21">Read()</a> is <span class="caps">EOF</span>.</p>
<div class="highlight"><pre><span></span><span class="o">//</span> <span class="n">mcat</span><span class="o">.</span><span class="n">go</span>
<span class="n">package</span> <span class="n">main</span>
<span class="kn">import</span> <span class="p">(</span>
<span class="s2">&quot;fmt&quot;</span>
<span class="s2">&quot;os&quot;</span>
<span class="s2">&quot;io&quot;</span>
<span class="p">)</span>
<span class="n">func</span> <span class="n">main</span><span class="p">()</span> <span class="p">{</span>
<span class="nb">file</span><span class="p">,</span> <span class="n">err</span> <span class="p">:</span><span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">Open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">Args</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
<span class="k">if</span> <span class="n">err</span> <span class="o">!=</span> <span class="n">nil</span> <span class="p">{</span>
<span class="n">fmt</span><span class="o">.</span><span class="n">Fprintf</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">Stderr</span><span class="p">,</span> <span class="s2">&quot;mcat: %v</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">err</span><span class="p">)</span>
<span class="n">os</span><span class="o">.</span><span class="n">Exit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="p">}</span>
<span class="nb">buffer</span> <span class="p">:</span><span class="o">=</span> <span class="n">make</span><span class="p">([]</span><span class="n">byte</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">//</span> <span class="mi">1</span><span class="o">-</span><span class="n">byte</span> <span class="nb">buffer</span>
<span class="k">for</span> <span class="p">{</span>
<span class="n">bytesread</span><span class="p">,</span> <span class="n">err</span> <span class="p">:</span><span class="o">=</span> <span class="nb">file</span><span class="o">.</span><span class="n">Read</span><span class="p">(</span><span class="nb">buffer</span><span class="p">)</span>
<span class="k">if</span> <span class="n">err</span> <span class="o">==</span> <span class="n">io</span><span class="o">.</span><span class="n">EOF</span> <span class="p">{</span>
<span class="k">break</span>
<span class="p">}</span>
<span class="n">fmt</span><span class="o">.</span><span class="n">Print</span><span class="p">(</span><span class="n">string</span><span class="p">(</span><span class="nb">buffer</span><span class="p">[:</span><span class="n">bytesread</span><span class="p">]))</span>
<span class="p">}</span>
<span class="nb">file</span><span class="o">.</span><span class="n">Close</span><span class="p">()</span>
<span class="p">}</span>
</pre></div>
<p><br/></p>
<div class="highlight"><pre><span></span>$ go run mcat.go helloworld.txt
Hello world!
</pre></div>
</li>
<li>
<p>JavaScript&nbsp;(node.js)</p>
<p>There is no explicit check for <span class="caps">EOF</span>, but the <a href="https://nodejs.org/api/stream.html#stream_event_end"><em>end</em> event</a> on a stream is fired when the end of a file is reached and a <em>read</em> operation tries to read more&nbsp;data.</p>
<div class="highlight"><pre><span></span><span class="cm">/* mcat.js */</span>
<span class="kr">const</span> <span class="nx">fs</span> <span class="o">=</span> <span class="nx">require</span><span class="p">(</span><span class="s1">&#39;fs&#39;</span><span class="p">);</span>
<span class="kr">const</span> <span class="nx">process</span> <span class="o">=</span> <span class="nx">require</span><span class="p">(</span><span class="s1">&#39;process&#39;</span><span class="p">);</span>
<span class="kr">const</span> <span class="nx">fileName</span> <span class="o">=</span> <span class="nx">process</span><span class="p">.</span><span class="nx">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">];</span>
<span class="kd">var</span> <span class="nx">readable</span> <span class="o">=</span> <span class="nx">fs</span><span class="p">.</span><span class="nx">createReadStream</span><span class="p">(</span><span class="nx">fileName</span><span class="p">,</span> <span class="p">{</span>
<span class="nx">encoding</span><span class="o">:</span> <span class="s1">&#39;utf8&#39;</span><span class="p">,</span>
<span class="nx">fd</span><span class="o">:</span> <span class="kc">null</span><span class="p">,</span>
<span class="p">});</span>
<span class="nx">readable</span><span class="p">.</span><span class="nx">on</span><span class="p">(</span><span class="s1">&#39;readable&#39;</span><span class="p">,</span> <span class="kd">function</span><span class="p">()</span> <span class="p">{</span>
<span class="kd">var</span> <span class="nx">chunk</span><span class="p">;</span>
<span class="k">while</span> <span class="p">((</span><span class="nx">chunk</span> <span class="o">=</span> <span class="nx">readable</span><span class="p">.</span><span class="nx">read</span><span class="p">(</span><span class="mi">1</span><span class="p">))</span> <span class="o">!==</span> <span class="kc">null</span><span class="p">)</span> <span class="p">{</span>
<span class="nx">process</span><span class="p">.</span><span class="nx">stdout</span><span class="p">.</span><span class="nx">write</span><span class="p">(</span><span class="nx">chunk</span><span class="p">);</span> <span class="cm">/* chunk is one byte */</span>
<span class="p">}</span>
<span class="p">});</span>
<span class="nx">readable</span><span class="p">.</span><span class="nx">on</span><span class="p">(</span><span class="s1">&#39;end&#39;</span><span class="p">,</span> <span class="p">()</span> <span class="p">=&gt;</span> <span class="p">{</span>
<span class="nx">console</span><span class="p">.</span><span class="nx">log</span><span class="p">(</span><span class="s1">&#39;\nEOF: There will be no more data.&#39;</span><span class="p">);</span>
<span class="p">});</span>
</pre></div>
<p><br/></p>
<div class="highlight"><pre><span></span>$ node mcat.js helloworld.txt
Hello world!
EOF: There will be no more data.
</pre></div>
</li>
</ol>
<p><br/>
How do the high-level I/O routines in the examples above determine the <em>end-of-file</em> condition? On Linux systems the routines either directly or indirectly use the <a href="https://en.wikipedia.org/wiki/Read_(system_call)">read()</a> system call provided by the kernel. The <em>getc()</em> function (or macro) in C, for example, uses the <em>read()</em> system call and returns <span class="caps">EOF</span> if <em>read()</em> indicated the <em>end-of-file</em> condition. The <a href="https://en.wikipedia.org/wiki/Read_(system_call)">read()</a> system call returns 0 to indicate the <span class="caps">EOF</span>&nbsp;condition.</p>
<p><img alt="" src="eofnotchar_stdsysio.png" width="400"></p>
<p>Let&#8217;s write a <em>cat</em> version called <em>syscat</em> using Unix system calls only, both for fun and potentially some profit. Let&#8217;s do that in C&nbsp;first:</p>
<div class="highlight"><pre><span></span><span class="cm">/* syscat.c */</span>
<span class="cp">#include</span> <span class="cpf">&lt;sys/types.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;sys/stat.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;fcntl.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;unistd.h&gt;</span><span class="cp"></span>
<span class="kt">int</span> <span class="nf">main</span><span class="p">(</span><span class="kt">int</span> <span class="n">argc</span><span class="p">,</span> <span class="kt">char</span> <span class="o">*</span><span class="n">argv</span><span class="p">[])</span>
<span class="p">{</span>
<span class="kt">int</span> <span class="n">fd</span><span class="p">;</span>
<span class="kt">char</span> <span class="n">c</span><span class="p">;</span>
<span class="n">fd</span> <span class="o">=</span> <span class="n">open</span><span class="p">(</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">O_RDONLY</span><span class="p">,</span> <span class="mi">0</span><span class="p">);</span>
<span class="k">while</span> <span class="p">(</span><span class="n">read</span><span class="p">(</span><span class="n">fd</span><span class="p">,</span> <span class="o">&amp;</span><span class="n">c</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">)</span>
<span class="n">write</span><span class="p">(</span><span class="n">STDOUT_FILENO</span><span class="p">,</span> <span class="o">&amp;</span><span class="n">c</span><span class="p">,</span> <span class="mi">1</span><span class="p">);</span>
<span class="k">return</span> <span class="mi">0</span><span class="p">;</span>
<span class="p">}</span>
</pre></div>
<p><br/></p>
<div class="highlight"><pre><span></span>$ gcc -o syscat syscat.c
$ ./syscat helloworld.txt
Hello world!
</pre></div>
<p>In the code above, you can see that we use the fact that the <em>read()</em> function returns 0 to indicate <span class="caps">EOF</span>.</p>
<p>And the same in Python&nbsp;3:</p>
<div class="highlight"><pre><span></span><span class="c1"># syscat.py</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="n">fd</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">os</span><span class="o">.</span><span class="n">O_RDONLY</span><span class="p">)</span>
<span class="k">while</span> <span class="bp">True</span><span class="p">:</span>
<span class="n">c</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">fd</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">c</span><span class="p">:</span> <span class="c1"># EOF</span>
<span class="k">break</span>
<span class="n">os</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">fileno</span><span class="p">(),</span> <span class="n">c</span><span class="p">)</span>
</pre></div>
<p><br/></p>
<div class="highlight"><pre><span></span>$ python syscat.py helloworld.txt
Hello world!
</pre></div>
<p>And in Python3.8+ using <a href="https://docs.python.org/3/whatsnew/3.8.html#assignment-expressions">the walrus operator</a>:</p>
<div class="highlight"><pre><span></span><span class="c1"># syscat38.py</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="n">fd</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">open</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">os</span><span class="o">.</span><span class="n">O_RDONLY</span><span class="p">)</span>
<span class="k">while</span> <span class="n">c</span> <span class="p">:</span><span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">read</span><span class="p">(</span><span class="n">fd</span><span class="p">,</span> <span class="mi">1</span><span class="p">):</span>
<span class="n">os</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">fileno</span><span class="p">(),</span> <span class="n">c</span><span class="p">)</span>
</pre></div>
<p><br/></p>
<div class="highlight"><pre><span></span>$ python3.8 syscat38.py helloworld.txt
Hello world!
</pre></div>
<p><br/>
Let&#8217;s recap the main points about <span class="caps">EOF</span>&nbsp;again:</p>
<ul>
<li><span class="caps">EOF</span> is not a&nbsp;character</li>
<li><span class="caps">EOF</span> is not a character that you find at the end of a&nbsp;file</li>
<li><span class="caps">EOF</span> is a condition provided by the kernel that can be detected by an application <s>when a <em>read</em> operation reaches the end of a file</s></li>
</ul>
<p><strong>Update Mar 3, 2020</strong> Let&#8217;s recap the main points about <span class="caps">EOF</span> with added details for more&nbsp;clarity:</p>
<ul>
<li><span class="caps">EOF</span> in <span class="caps">ANSI</span> C is not a character. It&#8217;s a constant defined in <em>&lt;stdio.h></em> and its value is usually&nbsp;-1</li>
<li><span class="caps">EOF</span> is not a character in the <span class="caps">ASCII</span> or Unicode character&nbsp;set</li>
<li><span class="caps">EOF</span> is not a character that you find at the end of a file on Unix/Linux&nbsp;systems</li>
<li>There is no explicit &#8220;<span class="caps">EOF</span> character&#8221; at the end of a file on Unix/Linux&nbsp;systems</li>
<li><span class="caps">EOF</span>(end-of-file) is a condition provided by the kernel that can be detected by an application <s>when a <em>read</em> operation reaches the end of a file</s> (if <em>k</em> is the current file position and <em>m</em> is the size of a file, performing a <em>read()</em> when <em>k &gt;= m</em> triggers the&nbsp;condition)</li>
</ul>
<p><strong>Update Mar 14, 2020</strong>: I&#8217;m working on an update to the article based on all the feedback I&#8217;ve received so far. Stay&nbsp;tuned!</p>
<p><br/>
Happy learning and have a great&nbsp;day!</p>
<p><br/>
<em>Resources used in preparation for this article (some links are affiliate&nbsp;links):</em></p>
<ol>
<li><a target="_blank" href="https://www.amazon.com/gp/product/013409266X/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=013409266X&linkCode=as2&tag=russblo0b-20&linkId=ec2bfa5062cddb0c6f86266ba481c625">Computer Systems: A Programmer&#8217;s Perspective (3rd Edition)</a><img src="https://ir-na.amazon-adsystem.com/e/ir?t=russblo0b-20&amp;l=am2&amp;o=1&amp;a=013409266X" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important;" /></li>
<li><a target="_blank" href="https://www.amazon.com/gp/product/0131103628/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=0131103628&linkCode=as2&tag=russblo0b-20&linkId=97a792c45446683f7235710c2f8c899d">C Programming Language, 2nd Edition</a><img src="https://ir-na.amazon-adsystem.com/e/ir?t=russblo0b-20&amp;l=am2&amp;o=1&amp;a=0131103628" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important;" /></li>
<li><a target="_blank" href="https://www.amazon.com/gp/product/013937681X/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=013937681X&linkCode=as2&tag=russblo0b-20&linkId=b8b462e767809ac396966bbb3e79af76">The Unix Programming Environment (Prentice-Hall Software Series)</a><img src="https://ir-na.amazon-adsystem.com/e/ir?t=russblo0b-20&amp;l=am2&amp;o=1&amp;a=013937681X" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important;" /></li>
<li><a target="_blank" href="https://www.amazon.com/gp/product/0321637739/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=0321637739&linkCode=as2&tag=russblo0b-20&linkId=f9fc233797afcaf2c103f7aac24d717d">Advanced Programming in the <span class="caps">UNIX</span> Environment, 3rd Edition</a><img src="https://ir-na.amazon-adsystem.com/e/ir?t=russblo0b-20&amp;l=am2&amp;o=1&amp;a=0321637739" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important;" /></li>
<li><a target="_blank" href="https://www.amazon.com/gp/product/0134190440/ref=as_li_tl?ie=UTF8&camp=1789&creative=9325&creativeASIN=0134190440&linkCode=as2&tag=russblo0b-20&linkId=3e0104678e6eb68f11fb29e4cda46bd1">Go Programming Language, The (Addison-Wesley Professional Computing Series)</a><img src="https://ir-na.amazon-adsystem.com/e/ir?t=russblo0b-20&amp;l=am2&amp;o=1&amp;a=0134190440" width="1" height="1" border="0" alt="" style="border:none !important; margin:0px !important;" /></li>
<li><a href="https://docs.python.org/3/howto/unicode.html">Unicode <span class="caps">HOWTO</span></a></li>
<li><a href="https://nodejs.org/api/stream.html">Node.js Stream&nbsp;module</a></li>
<li><a href="https://golang.org/pkg/io/">Go io&nbsp;package</a></li>
<li><a href="https://en.wikipedia.org/wiki/Cat_(Unix)">cat&nbsp;(Unix)</a></li>
<li><a href="https://en.wikipedia.org/wiki/End-of-file">End-of-file</a></li>
<li><a href="https://en.wikipedia.org/wiki/End-of-Transmission_character">End-of-Transmission&nbsp;character</a></li>
</ol>
<p><br/>
<p>If you want to get my newest articles in your inbox, then enter your email address below and click "Get Updates!"</p>
<!-- Begin MailChimp Signup Form -->
<link href="https://cdn-images.mailchimp.com/embedcode/classic-081711.css"
rel="stylesheet" type="text/css">
<style type="text/css">
#mc_embed_signup {
background: #f5f5f5;
clear: left;
font: 18px Helvetica,Arial,sans-serif;
}
#mc_embed_signup form {
text-align: center;
padding: 20px 0 10px 3%;
}
#mc_embed_signup .mc-field-group input {
display: inline;
width: 40%;
}
#mc_embed_signup div.response {
width: 100%;
}
</style>
<div id="mc_embed_signup">
<form
action="https://ruslanspivak.us4.list-manage.com/subscribe/post?u=7dde30eedc045f4670430c25f&amp;id=6f69f44e03"
method="post"
id="mc-embedded-subscribe-form"
name="mc-embedded-subscribe-form"
class="validate"
target="_blank" novalidate>
<div id="mc_embed_signup_scroll">
<div class="mc-field-group">
<label for="mce-NAME">Enter Your First Name *</label>
<input type="text" value="" name="NAME" class="required" id="mce-NAME">
</div>
<div class="mc-field-group">
<label for="mce-EMAIL">Enter Your Best Email *</label>
<input type="email" value="" name="EMAIL" class="required email" id="mce-EMAIL">
</div>
<div id="mce-responses" class="clear">
<div class="response" id="mce-error-response" style="display:none"></div>
<div class="response" id="mce-success-response" style="display:none"></div>
</div>
<!-- real people should not fill this in and expect good things - do not remove this or risk form bot signups-->
<div style="position: absolute; left: -5000px;"><input type="text" name="b_7dde30eedc045f4670430c25f_6f69f44e03" tabindex="-1" value=""></div>
<div class="clear"><input type="submit" value="Get Updates!" name="subscribe" id="mc-embedded-subscribe" class="button" style="background-color: rgb(63, 146, 236);"></div>
</div>
</form>
</div>
<!-- <script type='text/javascript' src='//s3.amazonaws.com/downloads.mailchimp.com/js/mc-validate.js'></script><script type='text/javascript'>(function($) {window.fnames = new Array(); window.ftypes = new Array();fnames[1]='NAME';ftypes[1]='text';fnames[0]='EMAIL';ftypes[0]='email';}(jQuery));var $mcj = jQuery.noConflict(true);</script> -->
<!--End mc_embed_signup-->
</p>
</div>
<!-- /.entry-content -->
<hr/>
<section class="comments" id="comments">
<h2>Comments</h2>
<div id="disqus_thread"></div>
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'ruslanspivak'; // required: replace example with your forum shortname
var disqus_identifier = 'eof-is-not-a-character';
var disqus_url = 'https://ruslanspivak.com/eofnotchar/';
var disqus_config = function () {
this.language = "en";
};
/* * * DON'T EDIT BELOW THIS LINE * * */
(function () {
var dsq = document.createElement('script');
dsq.type = 'text/javascript';
dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by
Disqus.</a></noscript>
<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
</section>
</article>
</section>
</div>
<div class="col-sm-3" id="sidebar">
<aside>
<section class="well well-sm">
<ul class="list-group list-group-flush">
<li class="list-group-item"><h4><i class="fa fa-home fa-lg"></i><span class="icon-label">Social</span></h4>
<ul class="list-group" id="social">
<li class="list-group-item"><a href="https://github.com/rspivak/"><i class="fa fa-github-square fa-lg"></i> github</a></li>
<li class="list-group-item"><a href="https://twitter.com/rspivak"><i class="fa fa-twitter-square fa-lg"></i> twitter</a></li>
<li class="list-group-item"><a href="https://linkedin.com/in/ruslanspivak/"><i class="fa fa-linkedin-square fa-lg"></i> linkedin</a></li>
</ul>
</li>
<li class="list-group-item"><h4><i class="fa fa-home fa-lg"></i><span class="icon-label">Popular posts</span></h4>
<ul class="list-group" id="popularposts">
<li class="list-group-item"
style="font-size: 15px; word-break: normal;">
<a href="../lsbaws-part1/index.html">
Let's Build A Web Server. Part 1.
</a>
</li>
<li class="list-group-item"
style="font-size: 15px; word-break: normal;">
<a href="../lsbasi-part1/index.html">
Let's Build A Simple Interpreter. Part 1.
</a>
</li>
<li class="list-group-item"
style="font-size: 15px; word-break: normal;">
<a href="../lsbaws-part2/index.html">
Let's Build A Web Server. Part 2.
</a>
</li>
<li class="list-group-item"
style="font-size: 15px; word-break: normal;">
<a href="../lsbaws-part3/index.html">
Let's Build A Web Server. Part 3.
</a>
</li>
<li class="list-group-item"
style="font-size: 15px; word-break: normal;">
<a href="../lsbasi-part2/index.html">
Let's Build A Simple Interpreter. Part 2.
</a>
</li>
</ul>
</li>
<li class="list-group-item">
<h4>
<span>Disclaimer</span>
</h4>
<p id="disclaimer-text"> Some of the links on this site
have my Amazon referral id, which provides me with a small
commission for each sale. Thank you for your support.
</p>
</li>
</ul>
</section>
</aside>
</div>
</div>
</div>
<footer>
<div class="container">
<hr>
<div class="row">
<div class="col-xs-10">&copy; 2020 Ruslan Spivak
<!-- &middot; Powered by <a href="https://github.com/DandyDev/pelican-bootstrap3" target="_blank">pelican-bootstrap3</a>, -->
<!-- <a href="http://docs.getpelican.com/" target="_blank">Pelican</a>, -->
<!-- <a href="http://getbootstrap.com" target="_blank">Bootstrap</a> -->
<!-- -->
</div>
<div class="col-xs-2"><p class="pull-right"><i class="fa fa-arrow-up"></i> <a href="index.html#">Back to top</a></p></div>
</div>
</div>
</footer>
<script src="../theme/js/jquery.min.js"></script>
<!-- Include all compiled plugins (below), or include individual files as needed -->
<script src="../theme/js/bootstrap.min.js"></script>
<!-- Enable responsive features in IE8 with Respond.js (https://github.com/scottjehl/Respond) -->
<script src="../theme/js/respond.min.js"></script>
<!-- Disqus -->
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'ruslanspivak'; // required: replace example with your forum shortname
/* * * DON'T EDIT BELOW THIS LINE * * */
(function () {
var s = document.createElement('script');
s.async = true;
s.type = 'text/javascript';
s.src = '//' + disqus_shortname + '.disqus.com/count.js';
(document.getElementsByTagName('HEAD')[0] || document.getElementsByTagName('BODY')[0]).appendChild(s);
}());
</script>
<!-- End Disqus Code -->
<!-- Google Analytics Universal -->
<script type="text/javascript">
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-2572871-3', 'auto');
ga('send', 'pageview');
</script>
<!-- End Google Analytics Universal Code -->
</body>
</html>